blob: 793cc8879360aa5594d9c2a9c094b2d6e29c50ac [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000054 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000055 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000058 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
61#ifdef COUNT_ALLOCS
62 null_strings++;
63#endif
64 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
70#ifdef COUNT_ALLOCS
71 one_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76
77 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000078 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +000079 if (op == NULL)
80 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000082 op->ob_shash = -1;
83 op->ob_sstate = SSTATE_NOT_INTERNED;
84 if (str != NULL)
85 Py_MEMCPY(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
87 /* share short strings */
88 if (size == 0) {
89 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000090 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000091 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +000092 nullstring = op;
93 Py_INCREF(op);
94 } else if (size == 1 && str != NULL) {
95 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000096 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000097 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +000098 characters[*str & UCHAR_MAX] = op;
99 Py_INCREF(op);
100 }
101 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000102}
103
Christian Heimes44720832008-05-26 13:01:01 +0000104PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000106{
Christian Heimes44720832008-05-26 13:01:01 +0000107 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000108 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000109
110 assert(str != NULL);
111 size = strlen(str);
112 if (size > PY_SSIZE_T_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 Py_INCREF(op);
122 return (PyObject *)op;
123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 Py_INCREF(op);
129 return (PyObject *)op;
130 }
131
132 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000133 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +0000134 if (op == NULL)
135 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000136 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000137 op->ob_shash = -1;
138 op->ob_sstate = SSTATE_NOT_INTERNED;
139 Py_MEMCPY(op->ob_sval, str, size+1);
140 /* share short strings */
141 if (size == 0) {
142 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000143 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000144 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000145 nullstring = op;
146 Py_INCREF(op);
147 } else if (size == 1) {
148 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000149 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000151 characters[*str & UCHAR_MAX] = op;
152 Py_INCREF(op);
153 }
154 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000155}
156
Christian Heimes44720832008-05-26 13:01:01 +0000157PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000159{
Christian Heimes44720832008-05-26 13:01:01 +0000160 va_list count;
161 Py_ssize_t n = 0;
162 const char* f;
163 char *s;
164 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165
Christian Heimes44720832008-05-26 13:01:01 +0000166#ifdef VA_LIST_IS_ARRAY
167 Py_MEMCPY(count, vargs, sizeof(va_list));
168#else
169#ifdef __va_copy
170 __va_copy(count, vargs);
171#else
172 count = vargs;
173#endif
174#endif
175 /* step 1: figure out how large a buffer we need */
176 for (f = format; *f; f++) {
177 if (*f == '%') {
178 const char* p = f;
179 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
180 ;
181
182 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
183 * they don't affect the amount of space we reserve.
184 */
185 if ((*f == 'l' || *f == 'z') &&
186 (f[1] == 'd' || f[1] == 'u'))
187 ++f;
188
189 switch (*f) {
190 case 'c':
191 (void)va_arg(count, int);
192 /* fall through... */
193 case '%':
194 n++;
195 break;
196 case 'd': case 'u': case 'i': case 'x':
197 (void) va_arg(count, int);
198 /* 20 bytes is enough to hold a 64-bit
199 integer. Decimal takes the most space.
200 This isn't enough for octal. */
201 n += 20;
202 break;
203 case 's':
204 s = va_arg(count, char*);
205 n += strlen(s);
206 break;
207 case 'p':
208 (void) va_arg(count, int);
209 /* maximum 64-bit pointer representation:
210 * 0xffffffffffffffff
211 * so 19 characters is enough.
212 * XXX I count 18 -- what's the extra for?
213 */
214 n += 19;
215 break;
216 default:
217 /* if we stumble upon an unknown
218 formatting code, copy the rest of
219 the format string to the output
220 string. (we cannot just skip the
221 code, since there's no way to know
222 what's in the argument list) */
223 n += strlen(p);
224 goto expand;
225 }
226 } else
227 n++;
228 }
229 expand:
230 /* step 2: fill the buffer */
231 /* Since we've analyzed how much space we need for the worst case,
232 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000233 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000234 if (!string)
235 return NULL;
236
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000237 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000238
239 for (f = format; *f; f++) {
240 if (*f == '%') {
241 const char* p = f++;
242 Py_ssize_t i;
243 int longflag = 0;
244 int size_tflag = 0;
245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 if (*f == '.') {
251 f++;
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 }
256 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
257 f++;
258 /* handle the long flag, but only for %ld and %lu.
259 others can be added when necessary. */
260 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
261 longflag = 1;
262 ++f;
263 }
264 /* handle the size_t flag. */
265 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
266 size_tflag = 1;
267 ++f;
268 }
269
270 switch (*f) {
271 case 'c':
272 *s++ = va_arg(vargs, int);
273 break;
274 case 'd':
275 if (longflag)
276 sprintf(s, "%ld", va_arg(vargs, long));
277 else if (size_tflag)
278 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
279 va_arg(vargs, Py_ssize_t));
280 else
281 sprintf(s, "%d", va_arg(vargs, int));
282 s += strlen(s);
283 break;
284 case 'u':
285 if (longflag)
286 sprintf(s, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(s, "%u",
293 va_arg(vargs, unsigned int));
294 s += strlen(s);
295 break;
296 case 'i':
297 sprintf(s, "%i", va_arg(vargs, int));
298 s += strlen(s);
299 break;
300 case 'x':
301 sprintf(s, "%x", va_arg(vargs, int));
302 s += strlen(s);
303 break;
304 case 's':
305 p = va_arg(vargs, char*);
306 i = strlen(p);
307 if (n > 0 && i > n)
308 i = n;
309 Py_MEMCPY(s, p, i);
310 s += i;
311 break;
312 case 'p':
313 sprintf(s, "%p", va_arg(vargs, void*));
314 /* %p is ill-defined: ensure leading 0x. */
315 if (s[1] == 'X')
316 s[1] = 'x';
317 else if (s[1] != 'x') {
318 memmove(s+2, s, strlen(s)+1);
319 s[0] = '0';
320 s[1] = 'x';
321 }
322 s += strlen(s);
323 break;
324 case '%':
325 *s++ = '%';
326 break;
327 default:
328 strcpy(s, p);
329 s += strlen(s);
330 goto end;
331 }
332 } else
333 *s++ = *f;
334 }
335
336 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000337 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000338 return string;
339}
340
341PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000342PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000343{
344 PyObject* ret;
345 va_list vargs;
346
347#ifdef HAVE_STDARG_PROTOTYPES
348 va_start(vargs, format);
349#else
350 va_start(vargs);
351#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000352 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000353 va_end(vargs);
354 return ret;
355}
356
357
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000358PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000359 Py_ssize_t size,
360 const char *encoding,
361 const char *errors)
362{
363 PyObject *v, *str;
364
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000365 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000366 if (str == NULL)
367 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000368 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000369 Py_DECREF(str);
370 return v;
371}
372
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v;
378
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000379 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000380 PyErr_BadArgument();
381 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000382 }
383
Christian Heimes44720832008-05-26 13:01:01 +0000384 if (encoding == NULL) {
385#ifdef Py_USING_UNICODE
386 encoding = PyUnicode_GetDefaultEncoding();
387#else
388 PyErr_SetString(PyExc_ValueError, "no encoding specified");
389 goto onError;
390#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 /* Decode via the codec registry */
394 v = PyCodec_Decode(str, encoding, errors);
395 if (v == NULL)
396 goto onError;
397
398 return v;
399
400 onError:
401 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000402}
403
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000404PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000405 const char *encoding,
406 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407{
Christian Heimes44720832008-05-26 13:01:01 +0000408 PyObject *v;
409
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000410 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000411 if (v == NULL)
412 goto onError;
413
414#ifdef Py_USING_UNICODE
415 /* Convert Unicode to a string using the default encoding */
416 if (PyUnicode_Check(v)) {
417 PyObject *temp = v;
418 v = PyUnicode_AsEncodedString(v, NULL, NULL);
419 Py_DECREF(temp);
420 if (v == NULL)
421 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000422 }
Christian Heimes44720832008-05-26 13:01:01 +0000423#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000424 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000425 PyErr_Format(PyExc_TypeError,
426 "decoder did not return a string object (type=%.400s)",
427 Py_TYPE(v)->tp_name);
428 Py_DECREF(v);
429 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000430 }
Christian Heimes44720832008-05-26 13:01:01 +0000431
432 return v;
433
434 onError:
435 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436}
437
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000438PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000439 Py_ssize_t size,
440 const char *encoding,
441 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000442{
Christian Heimes44720832008-05-26 13:01:01 +0000443 PyObject *v, *str;
444
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000445 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000446 if (str == NULL)
447 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000448 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000449 Py_DECREF(str);
450 return v;
451}
452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000454 const char *encoding,
455 const char *errors)
456{
457 PyObject *v;
458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000459 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000460 PyErr_BadArgument();
461 goto onError;
462 }
463
464 if (encoding == NULL) {
465#ifdef Py_USING_UNICODE
466 encoding = PyUnicode_GetDefaultEncoding();
467#else
468 PyErr_SetString(PyExc_ValueError, "no encoding specified");
469 goto onError;
470#endif
471 }
472
473 /* Encode via the codec registry */
474 v = PyCodec_Encode(str, encoding, errors);
475 if (v == NULL)
476 goto onError;
477
478 return v;
479
480 onError:
481 return NULL;
482}
483
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000484PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000485 const char *encoding,
486 const char *errors)
487{
488 PyObject *v;
489
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000490 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000491 if (v == NULL)
492 goto onError;
493
494#ifdef Py_USING_UNICODE
495 /* Convert Unicode to a string using the default encoding */
496 if (PyUnicode_Check(v)) {
497 PyObject *temp = v;
498 v = PyUnicode_AsEncodedString(v, NULL, NULL);
499 Py_DECREF(temp);
500 if (v == NULL)
501 goto onError;
502 }
503#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000504 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000505 PyErr_Format(PyExc_TypeError,
506 "encoder did not return a string object (type=%.400s)",
507 Py_TYPE(v)->tp_name);
508 Py_DECREF(v);
509 goto onError;
510 }
511
512 return v;
513
514 onError:
515 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000516}
517
518static void
Christian Heimes44720832008-05-26 13:01:01 +0000519string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000520{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000521 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000522 case SSTATE_NOT_INTERNED:
523 break;
524
525 case SSTATE_INTERNED_MORTAL:
526 /* revive dead object temporarily for DelItem */
527 Py_REFCNT(op) = 3;
528 if (PyDict_DelItem(interned, op) != 0)
529 Py_FatalError(
530 "deletion of interned string failed");
531 break;
532
533 case SSTATE_INTERNED_IMMORTAL:
534 Py_FatalError("Immortal interned string died.");
535
536 default:
537 Py_FatalError("Inconsistent interned string state.");
538 }
539 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000540}
541
Christian Heimes44720832008-05-26 13:01:01 +0000542/* Unescape a backslash-escaped string. If unicode is non-zero,
543 the string is a u-literal. If recode_encoding is non-zero,
544 the string is UTF-8 encoded and should be re-encoded in the
545 specified encoding. */
546
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000547PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000548 Py_ssize_t len,
549 const char *errors,
550 Py_ssize_t unicode,
551 const char *recode_encoding)
552{
553 int c;
554 char *p, *buf;
555 const char *end;
556 PyObject *v;
557 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000558 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000559 if (v == NULL)
560 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000561 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000562 end = s + len;
563 while (s < end) {
564 if (*s != '\\') {
565 non_esc:
566#ifdef Py_USING_UNICODE
567 if (recode_encoding && (*s & 0x80)) {
568 PyObject *u, *w;
569 char *r;
570 const char* t;
571 Py_ssize_t rn;
572 t = s;
573 /* Decode non-ASCII bytes as UTF-8. */
574 while (t < end && (*t & 0x80)) t++;
575 u = PyUnicode_DecodeUTF8(s, t - s, errors);
576 if(!u) goto failed;
577
578 /* Recode them in target encoding. */
579 w = PyUnicode_AsEncodedString(
580 u, recode_encoding, errors);
581 Py_DECREF(u);
582 if (!w) goto failed;
583
584 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000585 assert(PyString_Check(w));
586 r = PyString_AS_STRING(w);
587 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000588 Py_MEMCPY(p, r, rn);
589 p += rn;
590 Py_DECREF(w);
591 s = t;
592 } else {
593 *p++ = *s++;
594 }
595#else
596 *p++ = *s++;
597#endif
598 continue;
599 }
600 s++;
601 if (s==end) {
602 PyErr_SetString(PyExc_ValueError,
603 "Trailing \\ in string");
604 goto failed;
605 }
606 switch (*s++) {
607 /* XXX This assumes ASCII! */
608 case '\n': break;
609 case '\\': *p++ = '\\'; break;
610 case '\'': *p++ = '\''; break;
611 case '\"': *p++ = '\"'; break;
612 case 'b': *p++ = '\b'; break;
613 case 'f': *p++ = '\014'; break; /* FF */
614 case 't': *p++ = '\t'; break;
615 case 'n': *p++ = '\n'; break;
616 case 'r': *p++ = '\r'; break;
617 case 'v': *p++ = '\013'; break; /* VT */
618 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
619 case '0': case '1': case '2': case '3':
620 case '4': case '5': case '6': case '7':
621 c = s[-1] - '0';
622 if (s < end && '0' <= *s && *s <= '7') {
623 c = (c<<3) + *s++ - '0';
624 if (s < end && '0' <= *s && *s <= '7')
625 c = (c<<3) + *s++ - '0';
626 }
627 *p++ = c;
628 break;
629 case 'x':
630 if (s+1 < end &&
631 isxdigit(Py_CHARMASK(s[0])) &&
632 isxdigit(Py_CHARMASK(s[1])))
633 {
634 unsigned int x = 0;
635 c = Py_CHARMASK(*s);
636 s++;
637 if (isdigit(c))
638 x = c - '0';
639 else if (islower(c))
640 x = 10 + c - 'a';
641 else
642 x = 10 + c - 'A';
643 x = x << 4;
644 c = Py_CHARMASK(*s);
645 s++;
646 if (isdigit(c))
647 x += c - '0';
648 else if (islower(c))
649 x += 10 + c - 'a';
650 else
651 x += 10 + c - 'A';
652 *p++ = x;
653 break;
654 }
655 if (!errors || strcmp(errors, "strict") == 0) {
656 PyErr_SetString(PyExc_ValueError,
657 "invalid \\x escape");
658 goto failed;
659 }
660 if (strcmp(errors, "replace") == 0) {
661 *p++ = '?';
662 } else if (strcmp(errors, "ignore") == 0)
663 /* do nothing */;
664 else {
665 PyErr_Format(PyExc_ValueError,
666 "decoding error; "
667 "unknown error handling code: %.400s",
668 errors);
669 goto failed;
670 }
671#ifndef Py_USING_UNICODE
672 case 'u':
673 case 'U':
674 case 'N':
675 if (unicode) {
676 PyErr_SetString(PyExc_ValueError,
677 "Unicode escapes not legal "
678 "when Unicode disabled");
679 goto failed;
680 }
681#endif
682 default:
683 *p++ = '\\';
684 s--;
685 goto non_esc; /* an arbitry number of unescaped
686 UTF-8 bytes may follow. */
687 }
688 }
689 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000690 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000691 return v;
692 failed:
693 Py_DECREF(v);
694 return NULL;
695}
696
697/* -------------------------------------------------------------------- */
698/* object api */
699
Christian Heimes1a6387e2008-03-26 12:49:49 +0000700static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000701string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000702{
Christian Heimes44720832008-05-26 13:01:01 +0000703 char *s;
704 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000705 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000706 return -1;
707 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000708}
709
Christian Heimes44720832008-05-26 13:01:01 +0000710static /*const*/ char *
711string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000712{
Christian Heimes44720832008-05-26 13:01:01 +0000713 char *s;
714 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000715 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000716 return NULL;
717 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000718}
719
720Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000721PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000723 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000724 return string_getsize(op);
725 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726}
727
Christian Heimes44720832008-05-26 13:01:01 +0000728/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000729PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000730{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000731 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000732 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000733 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000734}
735
736int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000737PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000738 register char **s,
739 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740{
Christian Heimes44720832008-05-26 13:01:01 +0000741 if (s == NULL) {
742 PyErr_BadInternalCall();
743 return -1;
744 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000746 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000747#ifdef Py_USING_UNICODE
748 if (PyUnicode_Check(obj)) {
749 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
750 if (obj == NULL)
751 return -1;
752 }
753 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754#endif
Christian Heimes44720832008-05-26 13:01:01 +0000755 {
756 PyErr_Format(PyExc_TypeError,
757 "expected string or Unicode object, "
758 "%.200s found", Py_TYPE(obj)->tp_name);
759 return -1;
760 }
761 }
762
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000763 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000764 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000765 *len = PyString_GET_SIZE(obj);
766 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000767 PyErr_SetString(PyExc_TypeError,
768 "expected string without null bytes");
769 return -1;
770 }
771 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000772}
773
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774/* -------------------------------------------------------------------- */
775/* Methods */
776
Christian Heimes44720832008-05-26 13:01:01 +0000777#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000778#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000779
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780#include "stringlib/count.h"
781#include "stringlib/find.h"
782#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000784#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000785#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787
788
789static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000790string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Christian Heimes44720832008-05-26 13:01:01 +0000792 Py_ssize_t i, str_len;
793 char c;
794 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795
Christian Heimes44720832008-05-26 13:01:01 +0000796 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000797 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000798 int ret;
799 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000800 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000801 if (op == NULL)
802 return -1;
803 ret = string_print(op, fp, flags);
804 Py_DECREF(op);
805 return ret;
806 }
807 if (flags & Py_PRINT_RAW) {
808 char *data = op->ob_sval;
809 Py_ssize_t size = Py_SIZE(op);
810 Py_BEGIN_ALLOW_THREADS
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
821#ifdef __VMS
822 if (size) fwrite(data, (int)size, 1, fp);
823#else
824 fwrite(data, 1, (int)size, fp);
825#endif
826 Py_END_ALLOW_THREADS
827 return 0;
828 }
829
830 /* figure out which quote to use; single is preferred */
831 quote = '\'';
832 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
833 !memchr(op->ob_sval, '"', Py_SIZE(op)))
834 quote = '"';
835
836 str_len = Py_SIZE(op);
837 Py_BEGIN_ALLOW_THREADS
838 fputc(quote, fp);
839 for (i = 0; i < str_len; i++) {
840 /* Since strings are immutable and the caller should have a
841 reference, accessing the interal buffer should not be an issue
842 with the GIL released. */
843 c = op->ob_sval[i];
844 if (c == quote || c == '\\')
845 fprintf(fp, "\\%c", c);
846 else if (c == '\t')
847 fprintf(fp, "\\t");
848 else if (c == '\n')
849 fprintf(fp, "\\n");
850 else if (c == '\r')
851 fprintf(fp, "\\r");
852 else if (c < ' ' || c >= 0x7f)
853 fprintf(fp, "\\x%02x", c & 0xff);
854 else
855 fputc(c, fp);
856 }
857 fputc(quote, fp);
858 Py_END_ALLOW_THREADS
859 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000860}
861
Christian Heimes44720832008-05-26 13:01:01 +0000862PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000863PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000864{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000865 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000866 size_t newsize = 2 + 4 * Py_SIZE(op);
867 PyObject *v;
868 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
869 PyErr_SetString(PyExc_OverflowError,
870 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000871 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000872 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000873 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000874 if (v == NULL) {
875 return NULL;
876 }
877 else {
878 register Py_ssize_t i;
879 register char c;
880 register char *p;
881 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000882
Christian Heimes44720832008-05-26 13:01:01 +0000883 /* figure out which quote to use; single is preferred */
884 quote = '\'';
885 if (smartquotes &&
886 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
887 !memchr(op->ob_sval, '"', Py_SIZE(op)))
888 quote = '"';
889
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000890 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000891 *p++ = quote;
892 for (i = 0; i < Py_SIZE(op); i++) {
893 /* There's at least enough room for a hex escape
894 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000895 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000896 c = op->ob_sval[i];
897 if (c == quote || c == '\\')
898 *p++ = '\\', *p++ = c;
899 else if (c == '\t')
900 *p++ = '\\', *p++ = 't';
901 else if (c == '\n')
902 *p++ = '\\', *p++ = 'n';
903 else if (c == '\r')
904 *p++ = '\\', *p++ = 'r';
905 else if (c < ' ' || c >= 0x7f) {
906 /* For performance, we don't want to call
907 PyOS_snprintf here (extra layers of
908 function call). */
909 sprintf(p, "\\x%02x", c & 0xff);
910 p += 4;
911 }
912 else
913 *p++ = c;
914 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000915 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000916 *p++ = quote;
917 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000918 _PyString_Resize(
919 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000920 return v;
921 }
922}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923
924static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000925string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000926{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000927 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928}
929
Christian Heimes1a6387e2008-03-26 12:49:49 +0000930static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000931string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000932{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000933 assert(PyString_Check(s));
934 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000935 Py_INCREF(s);
936 return s;
937 }
938 else {
939 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000940 PyStringObject *t = (PyStringObject *) s;
941 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000942 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000943}
944
Christian Heimes44720832008-05-26 13:01:01 +0000945static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000946string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000947{
948 return Py_SIZE(a);
949}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000950
Christian Heimes44720832008-05-26 13:01:01 +0000951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000952string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000953{
954 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000955 register PyStringObject *op;
956 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000957#ifdef Py_USING_UNICODE
958 if (PyUnicode_Check(bb))
959 return PyUnicode_Concat((PyObject *)a, bb);
960#endif
961 if (PyByteArray_Check(bb))
962 return PyByteArray_Concat((PyObject *)a, bb);
963 PyErr_Format(PyExc_TypeError,
964 "cannot concatenate 'str' and '%.200s' objects",
965 Py_TYPE(bb)->tp_name);
966 return NULL;
967 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000968#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000969 /* Optimize cases with empty left or right operand */
970 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000971 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000972 if (Py_SIZE(a) == 0) {
973 Py_INCREF(bb);
974 return bb;
975 }
976 Py_INCREF(a);
977 return (PyObject *)a;
978 }
979 size = Py_SIZE(a) + Py_SIZE(b);
980 if (size < 0) {
981 PyErr_SetString(PyExc_OverflowError,
982 "strings are too large to concat");
983 return NULL;
984 }
985
986 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000987 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +0000988 if (op == NULL)
989 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000990 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000991 op->ob_shash = -1;
992 op->ob_sstate = SSTATE_NOT_INTERNED;
993 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
994 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
995 op->ob_sval[size] = '\0';
996 return (PyObject *) op;
997#undef b
998}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000999
Christian Heimes44720832008-05-26 13:01:01 +00001000static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001001string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001002{
1003 register Py_ssize_t i;
1004 register Py_ssize_t j;
1005 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001006 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001007 size_t nbytes;
1008 if (n < 0)
1009 n = 0;
1010 /* watch out for overflows: the size can overflow int,
1011 * and the # of bytes needed can overflow size_t
1012 */
1013 size = Py_SIZE(a) * n;
1014 if (n && size / n != Py_SIZE(a)) {
1015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001019 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020 Py_INCREF(a);
1021 return (PyObject *)a;
1022 }
1023 nbytes = (size_t)size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001024 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001025 PyErr_SetString(PyExc_OverflowError,
1026 "repeated string is too long");
1027 return NULL;
1028 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001029 op = (PyStringObject *)
1030 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001031 if (op == NULL)
1032 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001033 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001034 op->ob_shash = -1;
1035 op->ob_sstate = SSTATE_NOT_INTERNED;
1036 op->ob_sval[size] = '\0';
1037 if (Py_SIZE(a) == 1 && n > 0) {
1038 memset(op->ob_sval, a->ob_sval[0] , n);
1039 return (PyObject *) op;
1040 }
1041 i = 0;
1042 if (i < size) {
1043 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1044 i = Py_SIZE(a);
1045 }
1046 while (i < size) {
1047 j = (i <= size-i) ? i : size-i;
1048 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1049 i += j;
1050 }
1051 return (PyObject *) op;
1052}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001053
Christian Heimes44720832008-05-26 13:01:01 +00001054/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1055
1056static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001057string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001058 register Py_ssize_t j)
1059 /* j -- may be negative! */
1060{
1061 if (i < 0)
1062 i = 0;
1063 if (j < 0)
1064 j = 0; /* Avoid signed/unsigned bug in next line */
1065 if (j > Py_SIZE(a))
1066 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001067 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001068 /* It's the same as a */
1069 Py_INCREF(a);
1070 return (PyObject *)a;
1071 }
1072 if (j < i)
1073 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001075}
1076
1077static int
1078string_contains(PyObject *str_obj, PyObject *sub_obj)
1079{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001080 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001081#ifdef Py_USING_UNICODE
1082 if (PyUnicode_Check(sub_obj))
1083 return PyUnicode_Contains(str_obj, sub_obj);
1084#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001085 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001086 PyErr_Format(PyExc_TypeError,
1087 "'in <string>' requires string as left operand, "
1088 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1089 return -1;
1090 }
1091 }
1092
1093 return stringlib_contains_obj(str_obj, sub_obj);
1094}
1095
1096static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001097string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001098{
1099 char pchar;
1100 PyObject *v;
1101 if (i < 0 || i >= Py_SIZE(a)) {
1102 PyErr_SetString(PyExc_IndexError, "string index out of range");
1103 return NULL;
1104 }
1105 pchar = a->ob_sval[i];
1106 v = (PyObject *)characters[pchar & UCHAR_MAX];
1107 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001108 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001109 else {
1110#ifdef COUNT_ALLOCS
1111 one_strings++;
1112#endif
1113 Py_INCREF(v);
1114 }
1115 return v;
1116}
1117
1118static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001119string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001120{
1121 int c;
1122 Py_ssize_t len_a, len_b;
1123 Py_ssize_t min_len;
1124 PyObject *result;
1125
1126 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001127 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001128 result = Py_NotImplemented;
1129 goto out;
1130 }
1131 if (a == b) {
1132 switch (op) {
1133 case Py_EQ:case Py_LE:case Py_GE:
1134 result = Py_True;
1135 goto out;
1136 case Py_NE:case Py_LT:case Py_GT:
1137 result = Py_False;
1138 goto out;
1139 }
1140 }
1141 if (op == Py_EQ) {
1142 /* Supporting Py_NE here as well does not save
1143 much time, since Py_NE is rarely used. */
1144 if (Py_SIZE(a) == Py_SIZE(b)
1145 && (a->ob_sval[0] == b->ob_sval[0]
1146 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1147 result = Py_True;
1148 } else {
1149 result = Py_False;
1150 }
1151 goto out;
1152 }
1153 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1154 min_len = (len_a < len_b) ? len_a : len_b;
1155 if (min_len > 0) {
1156 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1157 if (c==0)
1158 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1159 } else
1160 c = 0;
1161 if (c == 0)
1162 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1163 switch (op) {
1164 case Py_LT: c = c < 0; break;
1165 case Py_LE: c = c <= 0; break;
1166 case Py_EQ: assert(0); break; /* unreachable */
1167 case Py_NE: c = c != 0; break;
1168 case Py_GT: c = c > 0; break;
1169 case Py_GE: c = c >= 0; break;
1170 default:
1171 result = Py_NotImplemented;
1172 goto out;
1173 }
1174 result = c ? Py_True : Py_False;
1175 out:
1176 Py_INCREF(result);
1177 return result;
1178}
1179
1180int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001181_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001182{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001183 PyStringObject *a = (PyStringObject*) o1;
1184 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001185 return Py_SIZE(a) == Py_SIZE(b)
1186 && *a->ob_sval == *b->ob_sval
1187 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1188}
1189
1190static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
1193 register Py_ssize_t len;
1194 register unsigned char *p;
1195 register long x;
1196
1197 if (a->ob_shash != -1)
1198 return a->ob_shash;
1199 len = Py_SIZE(a);
1200 p = (unsigned char *) a->ob_sval;
1201 x = *p << 7;
1202 while (--len >= 0)
1203 x = (1000003*x) ^ *p++;
1204 x ^= Py_SIZE(a);
1205 if (x == -1)
1206 x = -2;
1207 a->ob_shash = x;
1208 return x;
1209}
1210
1211static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001212string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001213{
1214 if (PyIndex_Check(item)) {
1215 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1216 if (i == -1 && PyErr_Occurred())
1217 return NULL;
1218 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001219 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001220 return string_item(self, i);
1221 }
1222 else if (PySlice_Check(item)) {
1223 Py_ssize_t start, stop, step, slicelength, cur, i;
1224 char* source_buf;
1225 char* result_buf;
1226 PyObject* result;
1227
1228 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001229 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001230 &start, &stop, &step, &slicelength) < 0) {
1231 return NULL;
1232 }
1233
1234 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001235 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001236 }
1237 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001238 slicelength == PyString_GET_SIZE(self) &&
1239 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001240 Py_INCREF(self);
1241 return (PyObject *)self;
1242 }
1243 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001244 return PyString_FromStringAndSize(
1245 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001246 slicelength);
1247 }
1248 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001250 result_buf = (char *)PyMem_Malloc(slicelength);
1251 if (result_buf == NULL)
1252 return PyErr_NoMemory();
1253
1254 for (cur = start, i = 0; i < slicelength;
1255 cur += step, i++) {
1256 result_buf[i] = source_buf[cur];
1257 }
1258
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001260 slicelength);
1261 PyMem_Free(result_buf);
1262 return result;
1263 }
1264 }
1265 else {
1266 PyErr_Format(PyExc_TypeError,
1267 "string indices must be integers, not %.200s",
1268 Py_TYPE(item)->tp_name);
1269 return NULL;
1270 }
1271}
1272
1273static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001274string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001275{
1276 if ( index != 0 ) {
1277 PyErr_SetString(PyExc_SystemError,
1278 "accessing non-existent string segment");
1279 return -1;
1280 }
1281 *ptr = (void *)self->ob_sval;
1282 return Py_SIZE(self);
1283}
1284
1285static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001286string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001287{
1288 PyErr_SetString(PyExc_TypeError,
1289 "Cannot use string as modifiable buffer");
1290 return -1;
1291}
1292
1293static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001294string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001295{
1296 if ( lenp )
1297 *lenp = Py_SIZE(self);
1298 return 1;
1299}
1300
1301static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001302string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001303{
1304 if ( index != 0 ) {
1305 PyErr_SetString(PyExc_SystemError,
1306 "accessing non-existent string segment");
1307 return -1;
1308 }
1309 *ptr = self->ob_sval;
1310 return Py_SIZE(self);
1311}
1312
1313static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001314string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001315{
1316 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1317 0, flags);
1318}
1319
1320static PySequenceMethods string_as_sequence = {
1321 (lenfunc)string_length, /*sq_length*/
1322 (binaryfunc)string_concat, /*sq_concat*/
1323 (ssizeargfunc)string_repeat, /*sq_repeat*/
1324 (ssizeargfunc)string_item, /*sq_item*/
1325 (ssizessizeargfunc)string_slice, /*sq_slice*/
1326 0, /*sq_ass_item*/
1327 0, /*sq_ass_slice*/
1328 (objobjproc)string_contains /*sq_contains*/
1329};
1330
1331static PyMappingMethods string_as_mapping = {
1332 (lenfunc)string_length,
1333 (binaryfunc)string_subscript,
1334 0,
1335};
1336
1337static PyBufferProcs string_as_buffer = {
1338 (readbufferproc)string_buffer_getreadbuf,
1339 (writebufferproc)string_buffer_getwritebuf,
1340 (segcountproc)string_buffer_getsegcount,
1341 (charbufferproc)string_buffer_getcharbuf,
1342 (getbufferproc)string_buffer_getbuffer,
1343 0, /* XXX */
1344};
1345
1346
1347
1348#define LEFTSTRIP 0
1349#define RIGHTSTRIP 1
1350#define BOTHSTRIP 2
1351
1352/* Arrays indexed by above */
1353static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1354
1355#define STRIPNAME(i) (stripformat[i]+3)
1356
Christian Heimes1a6387e2008-03-26 12:49:49 +00001357
1358/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001359#define Py_STRING_MATCH(target, offset, pattern, length) \
1360 (target[offset] == pattern[0] && \
1361 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001362 !memcmp(target+offset+1, pattern+1, length-2) )
1363
1364
Christian Heimes1a6387e2008-03-26 12:49:49 +00001365/* Overallocate the initial list to reduce the number of reallocs for small
1366 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1367 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1368 text (roughly 11 words per line) and field delimited data (usually 1-10
1369 fields). For large strings the split algorithms are bandwidth limited
1370 so increasing the preallocation likely will not improve things.*/
1371
1372#define MAX_PREALLOC 12
1373
1374/* 5 splits gives 6 elements */
1375#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001376 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001377
Christian Heimes44720832008-05-26 13:01:01 +00001378#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001379 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001380 (right) - (left)); \
1381 if (str == NULL) \
1382 goto onError; \
1383 if (PyList_Append(list, str)) { \
1384 Py_DECREF(str); \
1385 goto onError; \
1386 } \
1387 else \
1388 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389
Christian Heimes44720832008-05-26 13:01:01 +00001390#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001391 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001392 (right) - (left)); \
1393 if (str == NULL) \
1394 goto onError; \
1395 if (count < MAX_PREALLOC) { \
1396 PyList_SET_ITEM(list, count, str); \
1397 } else { \
1398 if (PyList_Append(list, str)) { \
1399 Py_DECREF(str); \
1400 goto onError; \
1401 } \
1402 else \
1403 Py_DECREF(str); \
1404 } \
1405 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001406
1407/* Always force the list to the expected size. */
1408#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1409
Christian Heimes44720832008-05-26 13:01:01 +00001410#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1411#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1413#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001414
1415Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001416split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001417{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001418 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001419 Py_ssize_t i, j, count=0;
1420 PyObject *str;
1421 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001422
Christian Heimes44720832008-05-26 13:01:01 +00001423 if (list == NULL)
1424 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001425
Christian Heimes44720832008-05-26 13:01:01 +00001426 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001427
Christian Heimes44720832008-05-26 13:01:01 +00001428 while (maxsplit-- > 0) {
1429 SKIP_SPACE(s, i, len);
1430 if (i==len) break;
1431 j = i; i++;
1432 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001433 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001434 /* No whitespace in self, so just use it as list[0] */
1435 Py_INCREF(self);
1436 PyList_SET_ITEM(list, 0, (PyObject *)self);
1437 count++;
1438 break;
1439 }
1440 SPLIT_ADD(s, j, i);
1441 }
1442
1443 if (i < len) {
1444 /* Only occurs when maxsplit was reached */
1445 /* Skip any remaining whitespace and copy to end of string */
1446 SKIP_SPACE(s, i, len);
1447 if (i != len)
1448 SPLIT_ADD(s, i, len);
1449 }
1450 FIX_PREALLOC_SIZE(list);
1451 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001452 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001453 Py_DECREF(list);
1454 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001455}
1456
Christian Heimes1a6387e2008-03-26 12:49:49 +00001457Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001458split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001460 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001461 register Py_ssize_t i, j, count=0;
1462 PyObject *str;
1463 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001464
Christian Heimes44720832008-05-26 13:01:01 +00001465 if (list == NULL)
1466 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467
Christian Heimes44720832008-05-26 13:01:01 +00001468 i = j = 0;
1469 while ((j < len) && (maxcount-- > 0)) {
1470 for(; j<len; j++) {
1471 /* I found that using memchr makes no difference */
1472 if (s[j] == ch) {
1473 SPLIT_ADD(s, i, j);
1474 i = j = j + 1;
1475 break;
1476 }
1477 }
1478 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001479 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001480 /* ch not in self, so just use self as list[0] */
1481 Py_INCREF(self);
1482 PyList_SET_ITEM(list, 0, (PyObject *)self);
1483 count++;
1484 }
1485 else if (i <= len) {
1486 SPLIT_ADD(s, i, len);
1487 }
1488 FIX_PREALLOC_SIZE(list);
1489 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
1491 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001492 Py_DECREF(list);
1493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001494}
1495
1496PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001497"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498\n\
Christian Heimes44720832008-05-26 13:01:01 +00001499Return a list of the words in the string S, using sep as the\n\
1500delimiter string. If maxsplit is given, at most maxsplit\n\
1501splits are done. If sep is not specified or is None, any\n\
1502whitespace string is a separator and empty strings are removed\n\
1503from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001506string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001508 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001509 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001510 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001511 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001512#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001513 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514#endif
1515
Christian Heimes44720832008-05-26 13:01:01 +00001516 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1517 return NULL;
1518 if (maxsplit < 0)
1519 maxsplit = PY_SSIZE_T_MAX;
1520 if (subobj == Py_None)
1521 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001522 if (PyString_Check(subobj)) {
1523 sub = PyString_AS_STRING(subobj);
1524 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001525 }
1526#ifdef Py_USING_UNICODE
1527 else if (PyUnicode_Check(subobj))
1528 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1529#endif
1530 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1531 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001532
Christian Heimes44720832008-05-26 13:01:01 +00001533 if (n == 0) {
1534 PyErr_SetString(PyExc_ValueError, "empty separator");
1535 return NULL;
1536 }
1537 else if (n == 1)
1538 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539
Christian Heimes44720832008-05-26 13:01:01 +00001540 list = PyList_New(PREALLOC_SIZE(maxsplit));
1541 if (list == NULL)
1542 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543
1544#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001545 i = j = 0;
1546 while (maxsplit-- > 0) {
1547 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1548 if (pos < 0)
1549 break;
1550 j = i+pos;
1551 SPLIT_ADD(s, i, j);
1552 i = j + n;
1553 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001554#else
Christian Heimes44720832008-05-26 13:01:01 +00001555 i = j = 0;
1556 while ((j+n <= len) && (maxsplit-- > 0)) {
1557 for (; j+n <= len; j++) {
1558 if (Py_STRING_MATCH(s, j, sub, n)) {
1559 SPLIT_ADD(s, i, j);
1560 i = j = j + n;
1561 break;
1562 }
1563 }
1564 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565#endif
Christian Heimes44720832008-05-26 13:01:01 +00001566 SPLIT_ADD(s, i, len);
1567 FIX_PREALLOC_SIZE(list);
1568 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569
Christian Heimes44720832008-05-26 13:01:01 +00001570 onError:
1571 Py_DECREF(list);
1572 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001573}
1574
1575PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001576"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001577\n\
Christian Heimes44720832008-05-26 13:01:01 +00001578Searches for the separator sep in S, and returns the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001579the separator itself, and the part after it. If the separator is not\n\
Christian Heimes44720832008-05-26 13:01:01 +00001580found, returns S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581
1582static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001583string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001584{
Christian Heimes44720832008-05-26 13:01:01 +00001585 const char *sep;
1586 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001587
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001588 if (PyString_Check(sep_obj)) {
1589 sep = PyString_AS_STRING(sep_obj);
1590 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001591 }
1592#ifdef Py_USING_UNICODE
1593 else if (PyUnicode_Check(sep_obj))
1594 return PyUnicode_Partition((PyObject *) self, sep_obj);
1595#endif
1596 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1597 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001598
Christian Heimes44720832008-05-26 13:01:01 +00001599 return stringlib_partition(
1600 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001601 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001602 sep_obj, sep, sep_len
1603 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001604}
1605
1606PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001607"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608\n\
Christian Heimes44720832008-05-26 13:01:01 +00001609Searches for the separator sep in S, starting at the end of S, and returns\n\
1610the part before it, the separator itself, and the part after it. If the\n\
1611separator is not found, returns two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001612
1613static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001614string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001615{
Christian Heimes44720832008-05-26 13:01:01 +00001616 const char *sep;
1617 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001618
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001619 if (PyString_Check(sep_obj)) {
1620 sep = PyString_AS_STRING(sep_obj);
1621 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001622 }
1623#ifdef Py_USING_UNICODE
1624 else if (PyUnicode_Check(sep_obj))
1625 return PyUnicode_Partition((PyObject *) self, sep_obj);
1626#endif
1627 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1628 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001629
Christian Heimes44720832008-05-26 13:01:01 +00001630 return stringlib_rpartition(
1631 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001632 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001633 sep_obj, sep, sep_len
1634 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001635}
1636
1637Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001640 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001641 Py_ssize_t i, j, count=0;
1642 PyObject *str;
1643 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644
Christian Heimes44720832008-05-26 13:01:01 +00001645 if (list == NULL)
1646 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001647
Christian Heimes44720832008-05-26 13:01:01 +00001648 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001649
Christian Heimes44720832008-05-26 13:01:01 +00001650 while (maxsplit-- > 0) {
1651 RSKIP_SPACE(s, i);
1652 if (i<0) break;
1653 j = i; i--;
1654 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001655 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001656 /* No whitespace in self, so just use it as list[0] */
1657 Py_INCREF(self);
1658 PyList_SET_ITEM(list, 0, (PyObject *)self);
1659 count++;
1660 break;
1661 }
1662 SPLIT_ADD(s, i + 1, j + 1);
1663 }
1664 if (i >= 0) {
1665 /* Only occurs when maxsplit was reached */
1666 /* Skip any remaining whitespace and copy to beginning of string */
1667 RSKIP_SPACE(s, i);
1668 if (i >= 0)
1669 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001670
Christian Heimes44720832008-05-26 13:01:01 +00001671 }
1672 FIX_PREALLOC_SIZE(list);
1673 if (PyList_Reverse(list) < 0)
1674 goto onError;
1675 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001676 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001677 Py_DECREF(list);
1678 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001679}
1680
1681Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001682rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001683{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001684 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001685 register Py_ssize_t i, j, count=0;
1686 PyObject *str;
1687 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001688
Christian Heimes44720832008-05-26 13:01:01 +00001689 if (list == NULL)
1690 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001691
Christian Heimes44720832008-05-26 13:01:01 +00001692 i = j = len - 1;
1693 while ((i >= 0) && (maxcount-- > 0)) {
1694 for (; i >= 0; i--) {
1695 if (s[i] == ch) {
1696 SPLIT_ADD(s, i + 1, j + 1);
1697 j = i = i - 1;
1698 break;
1699 }
1700 }
1701 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001702 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001703 /* ch not in self, so just use self as list[0] */
1704 Py_INCREF(self);
1705 PyList_SET_ITEM(list, 0, (PyObject *)self);
1706 count++;
1707 }
1708 else if (j >= -1) {
1709 SPLIT_ADD(s, 0, j + 1);
1710 }
1711 FIX_PREALLOC_SIZE(list);
1712 if (PyList_Reverse(list) < 0)
1713 goto onError;
1714 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 onError:
1717 Py_DECREF(list);
1718 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001719}
1720
1721PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001722"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001723\n\
Christian Heimes44720832008-05-26 13:01:01 +00001724Return a list of the words in the string S, using sep as the\n\
1725delimiter string, starting at the end of the string and working\n\
1726to the front. If maxsplit is given, at most maxsplit splits are\n\
1727done. If sep is not specified or is None, any whitespace string\n\
1728is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001729
1730static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001731string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001732{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001733 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001734 Py_ssize_t maxsplit = -1, count=0;
1735 const char *s, *sub;
1736 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001737
Christian Heimes44720832008-05-26 13:01:01 +00001738 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1739 return NULL;
1740 if (maxsplit < 0)
1741 maxsplit = PY_SSIZE_T_MAX;
1742 if (subobj == Py_None)
1743 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001744 if (PyString_Check(subobj)) {
1745 sub = PyString_AS_STRING(subobj);
1746 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001747 }
1748#ifdef Py_USING_UNICODE
1749 else if (PyUnicode_Check(subobj))
1750 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1751#endif
1752 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1753 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001754
Christian Heimes44720832008-05-26 13:01:01 +00001755 if (n == 0) {
1756 PyErr_SetString(PyExc_ValueError, "empty separator");
1757 return NULL;
1758 }
1759 else if (n == 1)
1760 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 list = PyList_New(PREALLOC_SIZE(maxsplit));
1763 if (list == NULL)
1764 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001765
Christian Heimes44720832008-05-26 13:01:01 +00001766 j = len;
1767 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001768
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001769 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001770 while ( (i >= 0) && (maxsplit-- > 0) ) {
1771 for (; i>=0; i--) {
1772 if (Py_STRING_MATCH(s, i, sub, n)) {
1773 SPLIT_ADD(s, i + n, j);
1774 j = i;
1775 i -= n;
1776 break;
1777 }
1778 }
1779 }
1780 SPLIT_ADD(s, 0, j);
1781 FIX_PREALLOC_SIZE(list);
1782 if (PyList_Reverse(list) < 0)
1783 goto onError;
1784 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
1786onError:
Christian Heimes44720832008-05-26 13:01:01 +00001787 Py_DECREF(list);
1788 return NULL;
1789}
1790
1791
1792PyDoc_STRVAR(join__doc__,
1793"S.join(sequence) -> string\n\
1794\n\
1795Return a string which is the concatenation of the strings in the\n\
1796sequence. The separator between elements is S.");
1797
1798static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001799string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001800{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001801 char *sep = PyString_AS_STRING(self);
1802 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001803 PyObject *res = NULL;
1804 char *p;
1805 Py_ssize_t seqlen = 0;
1806 size_t sz = 0;
1807 Py_ssize_t i;
1808 PyObject *seq, *item;
1809
1810 seq = PySequence_Fast(orig, "");
1811 if (seq == NULL) {
1812 return NULL;
1813 }
1814
1815 seqlen = PySequence_Size(seq);
1816 if (seqlen == 0) {
1817 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001818 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001819 }
1820 if (seqlen == 1) {
1821 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001822 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001823 Py_INCREF(item);
1824 Py_DECREF(seq);
1825 return item;
1826 }
1827 }
1828
1829 /* There are at least two things to join, or else we have a subclass
1830 * of the builtin types in the sequence.
1831 * Do a pre-pass to figure out the total amount of space we'll
1832 * need (sz), see whether any argument is absurd, and defer to
1833 * the Unicode join if appropriate.
1834 */
1835 for (i = 0; i < seqlen; i++) {
1836 const size_t old_sz = sz;
1837 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001838 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001839#ifdef Py_USING_UNICODE
1840 if (PyUnicode_Check(item)) {
1841 /* Defer to Unicode join.
1842 * CAUTION: There's no gurantee that the
1843 * original sequence can be iterated over
1844 * again, so we must pass seq here.
1845 */
1846 PyObject *result;
1847 result = PyUnicode_Join((PyObject *)self, seq);
1848 Py_DECREF(seq);
1849 return result;
1850 }
1851#endif
1852 PyErr_Format(PyExc_TypeError,
1853 "sequence item %zd: expected string,"
1854 " %.80s found",
1855 i, Py_TYPE(item)->tp_name);
1856 Py_DECREF(seq);
1857 return NULL;
1858 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001859 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001860 if (i != 0)
1861 sz += seplen;
1862 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1863 PyErr_SetString(PyExc_OverflowError,
1864 "join() result is too long for a Python string");
1865 Py_DECREF(seq);
1866 return NULL;
1867 }
1868 }
1869
1870 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001871 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001872 if (res == NULL) {
1873 Py_DECREF(seq);
1874 return NULL;
1875 }
1876
1877 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001878 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001879 for (i = 0; i < seqlen; ++i) {
1880 size_t n;
1881 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001882 n = PyString_GET_SIZE(item);
1883 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001884 p += n;
1885 if (i < seqlen - 1) {
1886 Py_MEMCPY(p, sep, seplen);
1887 p += seplen;
1888 }
1889 }
1890
1891 Py_DECREF(seq);
1892 return res;
1893}
1894
1895PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001896_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001897{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001898 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001899 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001900 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001901}
1902
1903Py_LOCAL_INLINE(void)
1904string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1905{
1906 if (*end > len)
1907 *end = len;
1908 else if (*end < 0)
1909 *end += len;
1910 if (*end < 0)
1911 *end = 0;
1912 if (*start < 0)
1913 *start += len;
1914 if (*start < 0)
1915 *start = 0;
1916}
1917
1918Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001919string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001920{
1921 PyObject *subobj;
1922 const char *sub;
1923 Py_ssize_t sub_len;
1924 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1925 PyObject *obj_start=Py_None, *obj_end=Py_None;
1926
1927 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1928 &obj_start, &obj_end))
1929 return -2;
1930 /* To support None in "start" and "end" arguments, meaning
1931 the same as if they were not passed.
1932 */
1933 if (obj_start != Py_None)
1934 if (!_PyEval_SliceIndex(obj_start, &start))
1935 return -2;
1936 if (obj_end != Py_None)
1937 if (!_PyEval_SliceIndex(obj_end, &end))
1938 return -2;
1939
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001940 if (PyString_Check(subobj)) {
1941 sub = PyString_AS_STRING(subobj);
1942 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001943 }
1944#ifdef Py_USING_UNICODE
1945 else if (PyUnicode_Check(subobj))
1946 return PyUnicode_Find(
1947 (PyObject *)self, subobj, start, end, dir);
1948#endif
1949 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1950 /* XXX - the "expected a character buffer object" is pretty
1951 confusing for a non-expert. remap to something else ? */
1952 return -2;
1953
1954 if (dir > 0)
1955 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001956 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001957 sub, sub_len, start, end);
1958 else
1959 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001960 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001961 sub, sub_len, start, end);
1962}
1963
1964
1965PyDoc_STRVAR(find__doc__,
1966"S.find(sub [,start [,end]]) -> int\n\
1967\n\
1968Return the lowest index in S where substring sub is found,\n\
1969such that sub is contained within s[start:end]. Optional\n\
1970arguments start and end are interpreted as in slice notation.\n\
1971\n\
1972Return -1 on failure.");
1973
1974static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001975string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001976{
1977 Py_ssize_t result = string_find_internal(self, args, +1);
1978 if (result == -2)
1979 return NULL;
1980 return PyInt_FromSsize_t(result);
1981}
1982
1983
1984PyDoc_STRVAR(index__doc__,
1985"S.index(sub [,start [,end]]) -> int\n\
1986\n\
1987Like S.find() but raise ValueError when the substring is not found.");
1988
1989static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001991{
1992 Py_ssize_t result = string_find_internal(self, args, +1);
1993 if (result == -2)
1994 return NULL;
1995 if (result == -1) {
1996 PyErr_SetString(PyExc_ValueError,
1997 "substring not found");
1998 return NULL;
1999 }
2000 return PyInt_FromSsize_t(result);
2001}
2002
2003
2004PyDoc_STRVAR(rfind__doc__,
2005"S.rfind(sub [,start [,end]]) -> int\n\
2006\n\
2007Return the highest index in S where substring sub is found,\n\
2008such that sub is contained within s[start:end]. Optional\n\
2009arguments start and end are interpreted as in slice notation.\n\
2010\n\
2011Return -1 on failure.");
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 Py_ssize_t result = string_find_internal(self, args, -1);
2017 if (result == -2)
2018 return NULL;
2019 return PyInt_FromSsize_t(result);
2020}
2021
2022
2023PyDoc_STRVAR(rindex__doc__,
2024"S.rindex(sub [,start [,end]]) -> int\n\
2025\n\
2026Like S.rfind() but raise ValueError when the substring is not found.");
2027
2028static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002029string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002030{
2031 Py_ssize_t result = string_find_internal(self, args, -1);
2032 if (result == -2)
2033 return NULL;
2034 if (result == -1) {
2035 PyErr_SetString(PyExc_ValueError,
2036 "substring not found");
2037 return NULL;
2038 }
2039 return PyInt_FromSsize_t(result);
2040}
2041
2042
2043Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002046 char *s = PyString_AS_STRING(self);
2047 Py_ssize_t len = PyString_GET_SIZE(self);
2048 char *sep = PyString_AS_STRING(sepobj);
2049 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002050 Py_ssize_t i, j;
2051
2052 i = 0;
2053 if (striptype != RIGHTSTRIP) {
2054 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2055 i++;
2056 }
2057 }
2058
2059 j = len;
2060 if (striptype != LEFTSTRIP) {
2061 do {
2062 j--;
2063 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2064 j++;
2065 }
2066
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002067 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002068 Py_INCREF(self);
2069 return (PyObject*)self;
2070 }
2071 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002072 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002073}
2074
2075
2076Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002077do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002078{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079 char *s = PyString_AS_STRING(self);
2080 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002081
2082 i = 0;
2083 if (striptype != RIGHTSTRIP) {
2084 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2085 i++;
2086 }
2087 }
2088
2089 j = len;
2090 if (striptype != LEFTSTRIP) {
2091 do {
2092 j--;
2093 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2094 j++;
2095 }
2096
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002097 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002098 Py_INCREF(self);
2099 return (PyObject*)self;
2100 }
2101 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002102 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002103}
2104
2105
2106Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002107do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002108{
2109 PyObject *sep = NULL;
2110
2111 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2112 return NULL;
2113
2114 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002115 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002116 return do_xstrip(self, striptype, sep);
2117#ifdef Py_USING_UNICODE
2118 else if (PyUnicode_Check(sep)) {
2119 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2120 PyObject *res;
2121 if (uniself==NULL)
2122 return NULL;
2123 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2124 striptype, sep);
2125 Py_DECREF(uniself);
2126 return res;
2127 }
2128#endif
2129 PyErr_Format(PyExc_TypeError,
2130#ifdef Py_USING_UNICODE
2131 "%s arg must be None, str or unicode",
2132#else
2133 "%s arg must be None or str",
2134#endif
2135 STRIPNAME(striptype));
2136 return NULL;
2137 }
2138
2139 return do_strip(self, striptype);
2140}
2141
2142
2143PyDoc_STRVAR(strip__doc__,
2144"S.strip([chars]) -> string or unicode\n\
2145\n\
2146Return a copy of the string S with leading and trailing\n\
2147whitespace removed.\n\
2148If chars is given and not None, remove characters in chars instead.\n\
2149If chars is unicode, S will be converted to unicode before stripping");
2150
2151static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002152string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002153{
2154 if (PyTuple_GET_SIZE(args) == 0)
2155 return do_strip(self, BOTHSTRIP); /* Common case */
2156 else
2157 return do_argstrip(self, BOTHSTRIP, args);
2158}
2159
2160
2161PyDoc_STRVAR(lstrip__doc__,
2162"S.lstrip([chars]) -> string or unicode\n\
2163\n\
2164Return a copy of the string S with leading whitespace removed.\n\
2165If chars is given and not None, remove characters in chars instead.\n\
2166If chars is unicode, S will be converted to unicode before stripping");
2167
2168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002169string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002170{
2171 if (PyTuple_GET_SIZE(args) == 0)
2172 return do_strip(self, LEFTSTRIP); /* Common case */
2173 else
2174 return do_argstrip(self, LEFTSTRIP, args);
2175}
2176
2177
2178PyDoc_STRVAR(rstrip__doc__,
2179"S.rstrip([chars]) -> string or unicode\n\
2180\n\
2181Return a copy of the string S with trailing whitespace removed.\n\
2182If chars is given and not None, remove characters in chars instead.\n\
2183If chars is unicode, S will be converted to unicode before stripping");
2184
2185static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002186string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002187{
2188 if (PyTuple_GET_SIZE(args) == 0)
2189 return do_strip(self, RIGHTSTRIP); /* Common case */
2190 else
2191 return do_argstrip(self, RIGHTSTRIP, args);
2192}
2193
2194
2195PyDoc_STRVAR(lower__doc__,
2196"S.lower() -> string\n\
2197\n\
2198Return a copy of the string S converted to lowercase.");
2199
2200/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2201#ifndef _tolower
2202#define _tolower tolower
2203#endif
2204
2205static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002206string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002207{
2208 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002209 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002210 PyObject *newobj;
2211
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002212 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002213 if (!newobj)
2214 return NULL;
2215
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002216 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002217
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002218 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002219
2220 for (i = 0; i < n; i++) {
2221 int c = Py_CHARMASK(s[i]);
2222 if (isupper(c))
2223 s[i] = _tolower(c);
2224 }
2225
2226 return newobj;
2227}
2228
2229PyDoc_STRVAR(upper__doc__,
2230"S.upper() -> string\n\
2231\n\
2232Return a copy of the string S converted to uppercase.");
2233
2234#ifndef _toupper
2235#define _toupper toupper
2236#endif
2237
2238static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002239string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002240{
2241 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002242 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002243 PyObject *newobj;
2244
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002245 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002246 if (!newobj)
2247 return NULL;
2248
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002249 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002251 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002252
2253 for (i = 0; i < n; i++) {
2254 int c = Py_CHARMASK(s[i]);
2255 if (islower(c))
2256 s[i] = _toupper(c);
2257 }
2258
2259 return newobj;
2260}
2261
2262PyDoc_STRVAR(title__doc__,
2263"S.title() -> string\n\
2264\n\
2265Return a titlecased version of S, i.e. words start with uppercase\n\
2266characters, all remaining cased characters have lowercase.");
2267
2268static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002269string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002270{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002271 char *s = PyString_AS_STRING(self), *s_new;
2272 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002273 int previous_is_cased = 0;
2274 PyObject *newobj;
2275
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002276 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002277 if (newobj == NULL)
2278 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002279 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002280 for (i = 0; i < n; i++) {
2281 int c = Py_CHARMASK(*s++);
2282 if (islower(c)) {
2283 if (!previous_is_cased)
2284 c = toupper(c);
2285 previous_is_cased = 1;
2286 } else if (isupper(c)) {
2287 if (previous_is_cased)
2288 c = tolower(c);
2289 previous_is_cased = 1;
2290 } else
2291 previous_is_cased = 0;
2292 *s_new++ = c;
2293 }
2294 return newobj;
2295}
2296
2297PyDoc_STRVAR(capitalize__doc__,
2298"S.capitalize() -> string\n\
2299\n\
2300Return a copy of the string S with only its first character\n\
2301capitalized.");
2302
2303static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002304string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002305{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002306 char *s = PyString_AS_STRING(self), *s_new;
2307 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002308 PyObject *newobj;
2309
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002310 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002311 if (newobj == NULL)
2312 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002313 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002314 if (0 < n) {
2315 int c = Py_CHARMASK(*s++);
2316 if (islower(c))
2317 *s_new = toupper(c);
2318 else
2319 *s_new = c;
2320 s_new++;
2321 }
2322 for (i = 1; i < n; i++) {
2323 int c = Py_CHARMASK(*s++);
2324 if (isupper(c))
2325 *s_new = tolower(c);
2326 else
2327 *s_new = c;
2328 s_new++;
2329 }
2330 return newobj;
2331}
2332
2333
2334PyDoc_STRVAR(count__doc__,
2335"S.count(sub[, start[, end]]) -> int\n\
2336\n\
2337Return the number of non-overlapping occurrences of substring sub in\n\
2338string S[start:end]. Optional arguments start and end are interpreted\n\
2339as in slice notation.");
2340
2341static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002342string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002343{
2344 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002345 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002346 Py_ssize_t sub_len;
2347 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2348
2349 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2350 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2351 return NULL;
2352
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002353 if (PyString_Check(sub_obj)) {
2354 sub = PyString_AS_STRING(sub_obj);
2355 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002356 }
2357#ifdef Py_USING_UNICODE
2358 else if (PyUnicode_Check(sub_obj)) {
2359 Py_ssize_t count;
2360 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2361 if (count == -1)
2362 return NULL;
2363 else
2364 return PyInt_FromSsize_t(count);
2365 }
2366#endif
2367 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2368 return NULL;
2369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002370 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002371
2372 return PyInt_FromSsize_t(
2373 stringlib_count(str + start, end - start, sub, sub_len)
2374 );
2375}
2376
2377PyDoc_STRVAR(swapcase__doc__,
2378"S.swapcase() -> string\n\
2379\n\
2380Return a copy of the string S with uppercase characters\n\
2381converted to lowercase and vice versa.");
2382
2383static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002384string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002385{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002386 char *s = PyString_AS_STRING(self), *s_new;
2387 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002388 PyObject *newobj;
2389
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002390 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002391 if (newobj == NULL)
2392 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002393 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002394 for (i = 0; i < n; i++) {
2395 int c = Py_CHARMASK(*s++);
2396 if (islower(c)) {
2397 *s_new = toupper(c);
2398 }
2399 else if (isupper(c)) {
2400 *s_new = tolower(c);
2401 }
2402 else
2403 *s_new = c;
2404 s_new++;
2405 }
2406 return newobj;
2407}
2408
2409
2410PyDoc_STRVAR(translate__doc__,
2411"S.translate(table [,deletechars]) -> string\n\
2412\n\
2413Return a copy of the string S, where all characters occurring\n\
2414in the optional argument deletechars are removed, and the\n\
2415remaining characters have been mapped through the given\n\
2416translation table, which must be a string of length 256.");
2417
2418static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002419string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002420{
2421 register char *input, *output;
2422 const char *table;
2423 register Py_ssize_t i, c, changed = 0;
2424 PyObject *input_obj = (PyObject*)self;
2425 const char *output_start, *del_table=NULL;
2426 Py_ssize_t inlen, tablen, dellen = 0;
2427 PyObject *result;
2428 int trans_table[256];
2429 PyObject *tableobj, *delobj = NULL;
2430
2431 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2432 &tableobj, &delobj))
2433 return NULL;
2434
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002435 if (PyString_Check(tableobj)) {
2436 table = PyString_AS_STRING(tableobj);
2437 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002438 }
2439 else if (tableobj == Py_None) {
2440 table = NULL;
2441 tablen = 256;
2442 }
2443#ifdef Py_USING_UNICODE
2444 else if (PyUnicode_Check(tableobj)) {
2445 /* Unicode .translate() does not support the deletechars
2446 parameter; instead a mapping to None will cause characters
2447 to be deleted. */
2448 if (delobj != NULL) {
2449 PyErr_SetString(PyExc_TypeError,
2450 "deletions are implemented differently for unicode");
2451 return NULL;
2452 }
2453 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2454 }
2455#endif
2456 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2457 return NULL;
2458
2459 if (tablen != 256) {
2460 PyErr_SetString(PyExc_ValueError,
2461 "translation table must be 256 characters long");
2462 return NULL;
2463 }
2464
2465 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002466 if (PyString_Check(delobj)) {
2467 del_table = PyString_AS_STRING(delobj);
2468 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002469 }
2470#ifdef Py_USING_UNICODE
2471 else if (PyUnicode_Check(delobj)) {
2472 PyErr_SetString(PyExc_TypeError,
2473 "deletions are implemented differently for unicode");
2474 return NULL;
2475 }
2476#endif
2477 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2478 return NULL;
2479 }
2480 else {
2481 del_table = NULL;
2482 dellen = 0;
2483 }
2484
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002485 inlen = PyString_GET_SIZE(input_obj);
2486 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002487 if (result == NULL)
2488 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002489 output_start = output = PyString_AsString(result);
2490 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002491
2492 if (dellen == 0 && table != NULL) {
2493 /* If no deletions are required, use faster code */
2494 for (i = inlen; --i >= 0; ) {
2495 c = Py_CHARMASK(*input++);
2496 if (Py_CHARMASK((*output++ = table[c])) != c)
2497 changed = 1;
2498 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002499 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002500 return result;
2501 Py_DECREF(result);
2502 Py_INCREF(input_obj);
2503 return input_obj;
2504 }
2505
2506 if (table == NULL) {
2507 for (i = 0; i < 256; i++)
2508 trans_table[i] = Py_CHARMASK(i);
2509 } else {
2510 for (i = 0; i < 256; i++)
2511 trans_table[i] = Py_CHARMASK(table[i]);
2512 }
2513
2514 for (i = 0; i < dellen; i++)
2515 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2516
2517 for (i = inlen; --i >= 0; ) {
2518 c = Py_CHARMASK(*input++);
2519 if (trans_table[c] != -1)
2520 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2521 continue;
2522 changed = 1;
2523 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002524 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529 /* Fix the size of the resulting string */
2530 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002531 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002532 return result;
2533}
2534
2535
2536#define FORWARD 1
2537#define REVERSE -1
2538
2539/* find and count characters and substrings */
2540
2541#define findchar(target, target_len, c) \
2542 ((char *)memchr((const void *)(target), c, target_len))
2543
2544/* String ops must return a string. */
2545/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002546Py_LOCAL(PyStringObject *)
2547return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002548{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002550 Py_INCREF(self);
2551 return self;
2552 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002553 return (PyStringObject *)PyString_FromStringAndSize(
2554 PyString_AS_STRING(self),
2555 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002556}
2557
2558Py_LOCAL_INLINE(Py_ssize_t)
2559countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2560{
2561 Py_ssize_t count=0;
2562 const char *start=target;
2563 const char *end=target+target_len;
2564
2565 while ( (start=findchar(start, end-start, c)) != NULL ) {
2566 count++;
2567 if (count >= maxcount)
2568 break;
2569 start += 1;
2570 }
2571 return count;
2572}
2573
2574Py_LOCAL(Py_ssize_t)
2575findstring(const char *target, Py_ssize_t target_len,
2576 const char *pattern, Py_ssize_t pattern_len,
2577 Py_ssize_t start,
2578 Py_ssize_t end,
2579 int direction)
2580{
2581 if (start < 0) {
2582 start += target_len;
2583 if (start < 0)
2584 start = 0;
2585 }
2586 if (end > target_len) {
2587 end = target_len;
2588 } else if (end < 0) {
2589 end += target_len;
2590 if (end < 0)
2591 end = 0;
2592 }
2593
2594 /* zero-length substrings always match at the first attempt */
2595 if (pattern_len == 0)
2596 return (direction > 0) ? start : end;
2597
2598 end -= pattern_len;
2599
2600 if (direction < 0) {
2601 for (; end >= start; end--)
2602 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2603 return end;
2604 } else {
2605 for (; start <= end; start++)
2606 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2607 return start;
2608 }
2609 return -1;
2610}
2611
2612Py_LOCAL_INLINE(Py_ssize_t)
2613countstring(const char *target, Py_ssize_t target_len,
2614 const char *pattern, Py_ssize_t pattern_len,
2615 Py_ssize_t start,
2616 Py_ssize_t end,
2617 int direction, Py_ssize_t maxcount)
2618{
2619 Py_ssize_t count=0;
2620
2621 if (start < 0) {
2622 start += target_len;
2623 if (start < 0)
2624 start = 0;
2625 }
2626 if (end > target_len) {
2627 end = target_len;
2628 } else if (end < 0) {
2629 end += target_len;
2630 if (end < 0)
2631 end = 0;
2632 }
2633
2634 /* zero-length substrings match everywhere */
2635 if (pattern_len == 0 || maxcount == 0) {
2636 if (target_len+1 < maxcount)
2637 return target_len+1;
2638 return maxcount;
2639 }
2640
2641 end -= pattern_len;
2642 if (direction < 0) {
2643 for (; (end >= start); end--)
2644 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2645 count++;
2646 if (--maxcount <= 0) break;
2647 end -= pattern_len-1;
2648 }
2649 } else {
2650 for (; (start <= end); start++)
2651 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2652 count++;
2653 if (--maxcount <= 0)
2654 break;
2655 start += pattern_len-1;
2656 }
2657 }
2658 return count;
2659}
2660
2661
2662/* Algorithms for different cases of string replacement */
2663
2664/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002665Py_LOCAL(PyStringObject *)
2666replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002667 const char *to_s, Py_ssize_t to_len,
2668 Py_ssize_t maxcount)
2669{
2670 char *self_s, *result_s;
2671 Py_ssize_t self_len, result_len;
2672 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002673 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002674
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002675 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002676
2677 /* 1 at the end plus 1 after every character */
2678 count = self_len+1;
2679 if (maxcount < count)
2680 count = maxcount;
2681
2682 /* Check for overflow */
2683 /* result_len = count * to_len + self_len; */
2684 product = count * to_len;
2685 if (product / to_len != count) {
2686 PyErr_SetString(PyExc_OverflowError,
2687 "replace string is too long");
2688 return NULL;
2689 }
2690 result_len = product + self_len;
2691 if (result_len < 0) {
2692 PyErr_SetString(PyExc_OverflowError,
2693 "replace string is too long");
2694 return NULL;
2695 }
2696
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002697 if (! (result = (PyStringObject *)
2698 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002699 return NULL;
2700
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002701 self_s = PyString_AS_STRING(self);
2702 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002703
2704 /* TODO: special case single character, which doesn't need memcpy */
2705
2706 /* Lay the first one down (guaranteed this will occur) */
2707 Py_MEMCPY(result_s, to_s, to_len);
2708 result_s += to_len;
2709 count -= 1;
2710
2711 for (i=0; i<count; i++) {
2712 *result_s++ = *self_s++;
2713 Py_MEMCPY(result_s, to_s, to_len);
2714 result_s += to_len;
2715 }
2716
2717 /* Copy the rest of the original string */
2718 Py_MEMCPY(result_s, self_s, self_len-i);
2719
2720 return result;
2721}
2722
2723/* Special case for deleting a single character */
2724/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725Py_LOCAL(PyStringObject *)
2726replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002727 char from_c, Py_ssize_t maxcount)
2728{
2729 char *self_s, *result_s;
2730 char *start, *next, *end;
2731 Py_ssize_t self_len, result_len;
2732 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002733 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002734
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002735 self_len = PyString_GET_SIZE(self);
2736 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002737
2738 count = countchar(self_s, self_len, from_c, maxcount);
2739 if (count == 0) {
2740 return return_self(self);
2741 }
2742
2743 result_len = self_len - count; /* from_len == 1 */
2744 assert(result_len>=0);
2745
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002746 if ( (result = (PyStringObject *)
2747 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002748 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002749 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002750
2751 start = self_s;
2752 end = self_s + self_len;
2753 while (count-- > 0) {
2754 next = findchar(start, end-start, from_c);
2755 if (next == NULL)
2756 break;
2757 Py_MEMCPY(result_s, start, next-start);
2758 result_s += (next-start);
2759 start = next+1;
2760 }
2761 Py_MEMCPY(result_s, start, end-start);
2762
2763 return result;
2764}
2765
2766/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2767
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002768Py_LOCAL(PyStringObject *)
2769replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002770 const char *from_s, Py_ssize_t from_len,
2771 Py_ssize_t maxcount) {
2772 char *self_s, *result_s;
2773 char *start, *next, *end;
2774 Py_ssize_t self_len, result_len;
2775 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002776 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002777
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002778 self_len = PyString_GET_SIZE(self);
2779 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002780
2781 count = countstring(self_s, self_len,
2782 from_s, from_len,
2783 0, self_len, 1,
2784 maxcount);
2785
2786 if (count == 0) {
2787 /* no matches */
2788 return return_self(self);
2789 }
2790
2791 result_len = self_len - (count * from_len);
2792 assert (result_len>=0);
2793
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002794 if ( (result = (PyStringObject *)
2795 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002796 return NULL;
2797
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002798 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002799
2800 start = self_s;
2801 end = self_s + self_len;
2802 while (count-- > 0) {
2803 offset = findstring(start, end-start,
2804 from_s, from_len,
2805 0, end-start, FORWARD);
2806 if (offset == -1)
2807 break;
2808 next = start + offset;
2809
2810 Py_MEMCPY(result_s, start, next-start);
2811
2812 result_s += (next-start);
2813 start = next+from_len;
2814 }
2815 Py_MEMCPY(result_s, start, end-start);
2816 return result;
2817}
2818
2819/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002820Py_LOCAL(PyStringObject *)
2821replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002822 char from_c, char to_c,
2823 Py_ssize_t maxcount)
2824{
2825 char *self_s, *result_s, *start, *end, *next;
2826 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002827 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002828
2829 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002830 self_s = PyString_AS_STRING(self);
2831 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002832
2833 next = findchar(self_s, self_len, from_c);
2834
2835 if (next == NULL) {
2836 /* No matches; return the original string */
2837 return return_self(self);
2838 }
2839
2840 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002841 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002842 if (result == NULL)
2843 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002845 Py_MEMCPY(result_s, self_s, self_len);
2846
2847 /* change everything in-place, starting with this one */
2848 start = result_s + (next-self_s);
2849 *start = to_c;
2850 start++;
2851 end = result_s + self_len;
2852
2853 while (--maxcount > 0) {
2854 next = findchar(start, end-start, from_c);
2855 if (next == NULL)
2856 break;
2857 *next = to_c;
2858 start = next+1;
2859 }
2860
2861 return result;
2862}
2863
2864/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865Py_LOCAL(PyStringObject *)
2866replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002867 const char *from_s, Py_ssize_t from_len,
2868 const char *to_s, Py_ssize_t to_len,
2869 Py_ssize_t maxcount)
2870{
2871 char *result_s, *start, *end;
2872 char *self_s;
2873 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002874 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002875
2876 /* The result string will be the same size */
2877
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002878 self_s = PyString_AS_STRING(self);
2879 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002880
2881 offset = findstring(self_s, self_len,
2882 from_s, from_len,
2883 0, self_len, FORWARD);
2884 if (offset == -1) {
2885 /* No matches; return the original string */
2886 return return_self(self);
2887 }
2888
2889 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002890 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002891 if (result == NULL)
2892 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002893 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002894 Py_MEMCPY(result_s, self_s, self_len);
2895
2896 /* change everything in-place, starting with this one */
2897 start = result_s + offset;
2898 Py_MEMCPY(start, to_s, from_len);
2899 start += from_len;
2900 end = result_s + self_len;
2901
2902 while ( --maxcount > 0) {
2903 offset = findstring(start, end-start,
2904 from_s, from_len,
2905 0, end-start, FORWARD);
2906 if (offset==-1)
2907 break;
2908 Py_MEMCPY(start+offset, to_s, from_len);
2909 start += offset+from_len;
2910 }
2911
2912 return result;
2913}
2914
2915/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002916Py_LOCAL(PyStringObject *)
2917replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002918 char from_c,
2919 const char *to_s, Py_ssize_t to_len,
2920 Py_ssize_t maxcount)
2921{
2922 char *self_s, *result_s;
2923 char *start, *next, *end;
2924 Py_ssize_t self_len, result_len;
2925 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002926 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002927
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002928 self_s = PyString_AS_STRING(self);
2929 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002930
2931 count = countchar(self_s, self_len, from_c, maxcount);
2932 if (count == 0) {
2933 /* no matches, return unchanged */
2934 return return_self(self);
2935 }
2936
2937 /* use the difference between current and new, hence the "-1" */
2938 /* result_len = self_len + count * (to_len-1) */
2939 product = count * (to_len-1);
2940 if (product / (to_len-1) != count) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
2944 result_len = self_len + product;
2945 if (result_len < 0) {
2946 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2947 return NULL;
2948 }
2949
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002950 if ( (result = (PyStringObject *)
2951 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002952 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002954
2955 start = self_s;
2956 end = self_s + self_len;
2957 while (count-- > 0) {
2958 next = findchar(start, end-start, from_c);
2959 if (next == NULL)
2960 break;
2961
2962 if (next == start) {
2963 /* replace with the 'to' */
2964 Py_MEMCPY(result_s, to_s, to_len);
2965 result_s += to_len;
2966 start += 1;
2967 } else {
2968 /* copy the unchanged old then the 'to' */
2969 Py_MEMCPY(result_s, start, next-start);
2970 result_s += (next-start);
2971 Py_MEMCPY(result_s, to_s, to_len);
2972 result_s += to_len;
2973 start = next+1;
2974 }
2975 }
2976 /* Copy the remainder of the remaining string */
2977 Py_MEMCPY(result_s, start, end-start);
2978
2979 return result;
2980}
2981
2982/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002983Py_LOCAL(PyStringObject *)
2984replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002985 const char *from_s, Py_ssize_t from_len,
2986 const char *to_s, Py_ssize_t to_len,
2987 Py_ssize_t maxcount) {
2988 char *self_s, *result_s;
2989 char *start, *next, *end;
2990 Py_ssize_t self_len, result_len;
2991 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002992 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002993
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002994 self_s = PyString_AS_STRING(self);
2995 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002996
2997 count = countstring(self_s, self_len,
2998 from_s, from_len,
2999 0, self_len, FORWARD, maxcount);
3000 if (count == 0) {
3001 /* no matches, return unchanged */
3002 return return_self(self);
3003 }
3004
3005 /* Check for overflow */
3006 /* result_len = self_len + count * (to_len-from_len) */
3007 product = count * (to_len-from_len);
3008 if (product / (to_len-from_len) != count) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012 result_len = self_len + product;
3013 if (result_len < 0) {
3014 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015 return NULL;
3016 }
3017
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 if ( (result = (PyStringObject *)
3019 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003020 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003021 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003022
3023 start = self_s;
3024 end = self_s + self_len;
3025 while (count-- > 0) {
3026 offset = findstring(start, end-start,
3027 from_s, from_len,
3028 0, end-start, FORWARD);
3029 if (offset == -1)
3030 break;
3031 next = start+offset;
3032 if (next == start) {
3033 /* replace with the 'to' */
3034 Py_MEMCPY(result_s, to_s, to_len);
3035 result_s += to_len;
3036 start += from_len;
3037 } else {
3038 /* copy the unchanged old then the 'to' */
3039 Py_MEMCPY(result_s, start, next-start);
3040 result_s += (next-start);
3041 Py_MEMCPY(result_s, to_s, to_len);
3042 result_s += to_len;
3043 start = next+from_len;
3044 }
3045 }
3046 /* Copy the remainder of the remaining string */
3047 Py_MEMCPY(result_s, start, end-start);
3048
3049 return result;
3050}
3051
3052
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003053Py_LOCAL(PyStringObject *)
3054replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003055 const char *from_s, Py_ssize_t from_len,
3056 const char *to_s, Py_ssize_t to_len,
3057 Py_ssize_t maxcount)
3058{
3059 if (maxcount < 0) {
3060 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003061 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003062 /* nothing to do; return the original string */
3063 return return_self(self);
3064 }
3065
3066 if (maxcount == 0 ||
3067 (from_len == 0 && to_len == 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self);
3070 }
3071
3072 /* Handle zero-length special cases */
3073
3074 if (from_len == 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self, to_s, to_len, maxcount);
3079 }
3080
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003084 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003085 return return_self(self);
3086 }
3087
3088 if (to_len == 0) {
3089 /* delete all occurances of 'from' string */
3090 if (from_len == 1) {
3091 return replace_delete_single_character(
3092 self, from_s[0], maxcount);
3093 } else {
3094 return replace_delete_substring(self, from_s, from_len, maxcount);
3095 }
3096 }
3097
3098 /* Handle special case where both strings have the same length */
3099
3100 if (from_len == to_len) {
3101 if (from_len == 1) {
3102 return replace_single_character_in_place(
3103 self,
3104 from_s[0],
3105 to_s[0],
3106 maxcount);
3107 } else {
3108 return replace_substring_in_place(
3109 self, from_s, from_len, to_s, to_len, maxcount);
3110 }
3111 }
3112
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len == 1) {
3115 return replace_single_character(self, from_s[0],
3116 to_s, to_len, maxcount);
3117 } else {
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3120 }
3121}
3122
3123PyDoc_STRVAR(replace__doc__,
3124"S.replace (old, new[, count]) -> string\n\
3125\n\
3126Return a copy of string S with all occurrences of substring\n\
3127old replaced by new. If the optional argument count is\n\
3128given, only the first count occurrences are replaced.");
3129
3130static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003131string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003132{
3133 Py_ssize_t count = -1;
3134 PyObject *from, *to;
3135 const char *from_s, *to_s;
3136 Py_ssize_t from_len, to_len;
3137
3138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3139 return NULL;
3140
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003141 if (PyString_Check(from)) {
3142 from_s = PyString_AS_STRING(from);
3143 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003144 }
3145#ifdef Py_USING_UNICODE
3146 if (PyUnicode_Check(from))
3147 return PyUnicode_Replace((PyObject *)self,
3148 from, to, count);
3149#endif
3150 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3151 return NULL;
3152
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003153 if (PyString_Check(to)) {
3154 to_s = PyString_AS_STRING(to);
3155 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003156 }
3157#ifdef Py_USING_UNICODE
3158 else if (PyUnicode_Check(to))
3159 return PyUnicode_Replace((PyObject *)self,
3160 from, to, count);
3161#endif
3162 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3163 return NULL;
3164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003165 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003166 from_s, from_len,
3167 to_s, to_len, count);
3168}
3169
3170/** End DALKE **/
3171
3172/* Matches the end (direction >= 0) or start (direction < 0) of self
3173 * against substr, using the start and end arguments. Returns
3174 * -1 on error, 0 if not found and 1 if found.
3175 */
3176Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003178 Py_ssize_t end, int direction)
3179{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003180 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003181 Py_ssize_t slen;
3182 const char* sub;
3183 const char* str;
3184
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003185 if (PyString_Check(substr)) {
3186 sub = PyString_AS_STRING(substr);
3187 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003188 }
3189#ifdef Py_USING_UNICODE
3190 else if (PyUnicode_Check(substr))
3191 return PyUnicode_Tailmatch((PyObject *)self,
3192 substr, start, end, direction);
3193#endif
3194 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3195 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003196 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003197
3198 string_adjust_indices(&start, &end, len);
3199
3200 if (direction < 0) {
3201 /* startswith */
3202 if (start+slen > len)
3203 return 0;
3204 } else {
3205 /* endswith */
3206 if (end-start < slen || start > len)
3207 return 0;
3208
3209 if (end-slen > start)
3210 start = end - slen;
3211 }
3212 if (end-start >= slen)
3213 return ! memcmp(str+start, sub, slen);
3214 return 0;
3215}
3216
3217
3218PyDoc_STRVAR(startswith__doc__,
3219"S.startswith(prefix[, start[, end]]) -> bool\n\
3220\n\
3221Return True if S starts with the specified prefix, False otherwise.\n\
3222With optional start, test S beginning at that position.\n\
3223With optional end, stop comparing S at that position.\n\
3224prefix can also be a tuple of strings to try.");
3225
3226static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003228{
3229 Py_ssize_t start = 0;
3230 Py_ssize_t end = PY_SSIZE_T_MAX;
3231 PyObject *subobj;
3232 int result;
3233
3234 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3236 return NULL;
3237 if (PyTuple_Check(subobj)) {
3238 Py_ssize_t i;
3239 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3240 result = _string_tailmatch(self,
3241 PyTuple_GET_ITEM(subobj, i),
3242 start, end, -1);
3243 if (result == -1)
3244 return NULL;
3245 else if (result) {
3246 Py_RETURN_TRUE;
3247 }
3248 }
3249 Py_RETURN_FALSE;
3250 }
3251 result = _string_tailmatch(self, subobj, start, end, -1);
3252 if (result == -1)
3253 return NULL;
3254 else
3255 return PyBool_FromLong(result);
3256}
3257
3258
3259PyDoc_STRVAR(endswith__doc__,
3260"S.endswith(suffix[, start[, end]]) -> bool\n\
3261\n\
3262Return True if S ends with the specified suffix, False otherwise.\n\
3263With optional start, test S beginning at that position.\n\
3264With optional end, stop comparing S at that position.\n\
3265suffix can also be a tuple of strings to try.");
3266
3267static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003268string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003269{
3270 Py_ssize_t start = 0;
3271 Py_ssize_t end = PY_SSIZE_T_MAX;
3272 PyObject *subobj;
3273 int result;
3274
3275 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3276 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3277 return NULL;
3278 if (PyTuple_Check(subobj)) {
3279 Py_ssize_t i;
3280 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3281 result = _string_tailmatch(self,
3282 PyTuple_GET_ITEM(subobj, i),
3283 start, end, +1);
3284 if (result == -1)
3285 return NULL;
3286 else if (result) {
3287 Py_RETURN_TRUE;
3288 }
3289 }
3290 Py_RETURN_FALSE;
3291 }
3292 result = _string_tailmatch(self, subobj, start, end, +1);
3293 if (result == -1)
3294 return NULL;
3295 else
3296 return PyBool_FromLong(result);
3297}
3298
3299
3300PyDoc_STRVAR(encode__doc__,
3301"S.encode([encoding[,errors]]) -> object\n\
3302\n\
3303Encodes S using the codec registered for encoding. encoding defaults\n\
3304to the default encoding. errors may be given to set a different error\n\
3305handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307'xmlcharrefreplace' as well as any other name registered with\n\
3308codecs.register_error that is able to handle UnicodeEncodeErrors.");
3309
3310static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003311string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003312{
3313 char *encoding = NULL;
3314 char *errors = NULL;
3315 PyObject *v;
3316
3317 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3318 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003319 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003320 if (v == NULL)
3321 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003323 PyErr_Format(PyExc_TypeError,
3324 "encoder did not return a string/unicode object "
3325 "(type=%.400s)",
3326 Py_TYPE(v)->tp_name);
3327 Py_DECREF(v);
3328 return NULL;
3329 }
3330 return v;
3331
3332 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003333 return NULL;
3334}
3335
Christian Heimes44720832008-05-26 13:01:01 +00003336
3337PyDoc_STRVAR(decode__doc__,
3338"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339\n\
Christian Heimes44720832008-05-26 13:01:01 +00003340Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003341to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003342handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344as well as any other name registerd with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003345able to handle UnicodeDecodeErrors.");
3346
3347static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349{
Christian Heimes44720832008-05-26 13:01:01 +00003350 char *encoding = NULL;
3351 char *errors = NULL;
3352 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353
3354 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3355 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003356 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003357 if (v == NULL)
3358 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003360 PyErr_Format(PyExc_TypeError,
3361 "decoder did not return a string/unicode object "
3362 "(type=%.400s)",
3363 Py_TYPE(v)->tp_name);
3364 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003366 }
3367 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368
Christian Heimes44720832008-05-26 13:01:01 +00003369 onError:
3370 return NULL;
3371}
3372
3373
3374PyDoc_STRVAR(expandtabs__doc__,
3375"S.expandtabs([tabsize]) -> string\n\
3376\n\
3377Return a copy of S where all tab characters are expanded using spaces.\n\
3378If tabsize is not given, a tab size of 8 characters is assumed.");
3379
3380static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003381string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003382{
3383 const char *e, *p, *qe;
3384 char *q;
3385 Py_ssize_t i, j, incr;
3386 PyObject *u;
3387 int tabsize = 8;
3388
3389 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3390 return NULL;
3391
3392 /* First pass: determine size of output string */
3393 i = 0; /* chars up to and including most recent \n or \r */
3394 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003395 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3396 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003397 if (*p == '\t') {
3398 if (tabsize > 0) {
3399 incr = tabsize - (j % tabsize);
3400 if (j > PY_SSIZE_T_MAX - incr)
3401 goto overflow1;
3402 j += incr;
3403 }
3404 }
3405 else {
3406 if (j > PY_SSIZE_T_MAX - 1)
3407 goto overflow1;
3408 j++;
3409 if (*p == '\n' || *p == '\r') {
3410 if (i > PY_SSIZE_T_MAX - j)
3411 goto overflow1;
3412 i += j;
3413 j = 0;
3414 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003415 }
Christian Heimes44720832008-05-26 13:01:01 +00003416
3417 if (i > PY_SSIZE_T_MAX - j)
3418 goto overflow1;
3419
3420 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003422 if (!u)
3423 return NULL;
3424
3425 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003426 q = PyString_AS_STRING(u); /* next output char */
3427 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003428
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003429 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003430 if (*p == '\t') {
3431 if (tabsize > 0) {
3432 i = tabsize - (j % tabsize);
3433 j += i;
3434 while (i--) {
3435 if (q >= qe)
3436 goto overflow2;
3437 *q++ = ' ';
3438 }
3439 }
3440 }
3441 else {
3442 if (q >= qe)
3443 goto overflow2;
3444 *q++ = *p;
3445 j++;
3446 if (*p == '\n' || *p == '\r')
3447 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003448 }
Christian Heimes44720832008-05-26 13:01:01 +00003449
3450 return u;
3451
3452 overflow2:
3453 Py_DECREF(u);
3454 overflow1:
3455 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3456 return NULL;
3457}
3458
3459Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003460pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003461{
3462 PyObject *u;
3463
3464 if (left < 0)
3465 left = 0;
3466 if (right < 0)
3467 right = 0;
3468
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003469 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003470 Py_INCREF(self);
3471 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003472 }
3473
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003474 u = PyString_FromStringAndSize(NULL,
3475 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003476 if (u) {
3477 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003478 memset(PyString_AS_STRING(u), fill, left);
3479 Py_MEMCPY(PyString_AS_STRING(u) + left,
3480 PyString_AS_STRING(self),
3481 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003482 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003484 fill, right);
3485 }
3486
3487 return u;
3488}
3489
3490PyDoc_STRVAR(ljust__doc__,
3491"S.ljust(width[, fillchar]) -> string\n"
3492"\n"
3493"Return S left justified in a string of length width. Padding is\n"
3494"done using the specified fill character (default is a space).");
3495
3496static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003497string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003498{
3499 Py_ssize_t width;
3500 char fillchar = ' ';
3501
3502 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3503 return NULL;
3504
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003505 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003506 Py_INCREF(self);
3507 return (PyObject*) self;
3508 }
3509
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003510 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003511}
3512
3513
3514PyDoc_STRVAR(rjust__doc__,
3515"S.rjust(width[, fillchar]) -> string\n"
3516"\n"
3517"Return S right justified in a string of length width. Padding is\n"
3518"done using the specified fill character (default is a space)");
3519
3520static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003521string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003522{
3523 Py_ssize_t width;
3524 char fillchar = ' ';
3525
3526 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3527 return NULL;
3528
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003529 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003530 Py_INCREF(self);
3531 return (PyObject*) self;
3532 }
3533
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003534 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003535}
3536
3537
3538PyDoc_STRVAR(center__doc__,
3539"S.center(width[, fillchar]) -> string\n"
3540"\n"
3541"Return S centered in a string of length width. Padding is\n"
3542"done using the specified fill character (default is a space)");
3543
3544static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003545string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003546{
3547 Py_ssize_t marg, left;
3548 Py_ssize_t width;
3549 char fillchar = ' ';
3550
3551 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3552 return NULL;
3553
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003554 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003555 Py_INCREF(self);
3556 return (PyObject*) self;
3557 }
3558
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003559 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003560 left = marg / 2 + (marg & width & 1);
3561
3562 return pad(self, left, marg - left, fillchar);
3563}
3564
3565PyDoc_STRVAR(zfill__doc__,
3566"S.zfill(width) -> string\n"
3567"\n"
3568"Pad a numeric string S with zeros on the left, to fill a field\n"
3569"of the specified width. The string S is never truncated.");
3570
3571static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003572string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003573{
3574 Py_ssize_t fill;
3575 PyObject *s;
3576 char *p;
3577 Py_ssize_t width;
3578
3579 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3580 return NULL;
3581
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003582 if (PyString_GET_SIZE(self) >= width) {
3583 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003584 Py_INCREF(self);
3585 return (PyObject*) self;
3586 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003588 return PyString_FromStringAndSize(
3589 PyString_AS_STRING(self),
3590 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003591 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003592 }
3593
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003594 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003595
Christian Heimes44720832008-05-26 13:01:01 +00003596 s = pad(self, fill, 0, '0');
3597
3598 if (s == NULL)
3599 return NULL;
3600
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003601 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003602 if (p[fill] == '+' || p[fill] == '-') {
3603 /* move sign to beginning of string */
3604 p[0] = p[fill];
3605 p[fill] = '0';
3606 }
3607
3608 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003609}
3610
Christian Heimes44720832008-05-26 13:01:01 +00003611PyDoc_STRVAR(isspace__doc__,
3612"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003613\n\
Christian Heimes44720832008-05-26 13:01:01 +00003614Return True if all characters in S are whitespace\n\
3615and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616
Christian Heimes44720832008-05-26 13:01:01 +00003617static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003618string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619{
Christian Heimes44720832008-05-26 13:01:01 +00003620 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003621 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003622 register const unsigned char *e;
3623
3624 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003625 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003626 isspace(*p))
3627 return PyBool_FromLong(1);
3628
3629 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003630 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003631 return PyBool_FromLong(0);
3632
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003633 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003634 for (; p < e; p++) {
3635 if (!isspace(*p))
3636 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637 }
Christian Heimes44720832008-05-26 13:01:01 +00003638 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003639}
3640
Christian Heimes44720832008-05-26 13:01:01 +00003641
3642PyDoc_STRVAR(isalpha__doc__,
3643"S.isalpha() -> bool\n\
3644\n\
3645Return True if all characters in S are alphabetic\n\
3646and there is at least one character in S, False otherwise.");
3647
3648static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003650{
Christian Heimes44720832008-05-26 13:01:01 +00003651 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003652 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003653 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003654
Christian Heimes44720832008-05-26 13:01:01 +00003655 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003656 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003657 isalpha(*p))
3658 return PyBool_FromLong(1);
3659
3660 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003661 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003662 return PyBool_FromLong(0);
3663
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003664 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003665 for (; p < e; p++) {
3666 if (!isalpha(*p))
3667 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003668 }
Christian Heimes44720832008-05-26 13:01:01 +00003669 return PyBool_FromLong(1);
3670}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003671
Christian Heimes44720832008-05-26 13:01:01 +00003672
3673PyDoc_STRVAR(isalnum__doc__,
3674"S.isalnum() -> bool\n\
3675\n\
3676Return True if all characters in S are alphanumeric\n\
3677and there is at least one character in S, False otherwise.");
3678
3679static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003681{
3682 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003683 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003684 register const unsigned char *e;
3685
3686 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003687 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003688 isalnum(*p))
3689 return PyBool_FromLong(1);
3690
3691 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003692 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003693 return PyBool_FromLong(0);
3694
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003695 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003696 for (; p < e; p++) {
3697 if (!isalnum(*p))
3698 return PyBool_FromLong(0);
3699 }
3700 return PyBool_FromLong(1);
3701}
3702
3703
3704PyDoc_STRVAR(isdigit__doc__,
3705"S.isdigit() -> bool\n\
3706\n\
3707Return True if all characters in S are digits\n\
3708and there is at least one character in S, False otherwise.");
3709
3710static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003712{
3713 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003714 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003715 register const unsigned char *e;
3716
3717 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003718 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003719 isdigit(*p))
3720 return PyBool_FromLong(1);
3721
3722 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003723 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003724 return PyBool_FromLong(0);
3725
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003726 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003727 for (; p < e; p++) {
3728 if (!isdigit(*p))
3729 return PyBool_FromLong(0);
3730 }
3731 return PyBool_FromLong(1);
3732}
3733
3734
3735PyDoc_STRVAR(islower__doc__,
3736"S.islower() -> bool\n\
3737\n\
3738Return True if all cased characters in S are lowercase and there is\n\
3739at least one cased character in S, False otherwise.");
3740
3741static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003743{
3744 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003745 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003746 register const unsigned char *e;
3747 int cased;
3748
3749 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003750 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003751 return PyBool_FromLong(islower(*p) != 0);
3752
3753 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003754 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003755 return PyBool_FromLong(0);
3756
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003757 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003758 cased = 0;
3759 for (; p < e; p++) {
3760 if (isupper(*p))
3761 return PyBool_FromLong(0);
3762 else if (!cased && islower(*p))
3763 cased = 1;
3764 }
3765 return PyBool_FromLong(cased);
3766}
3767
3768
3769PyDoc_STRVAR(isupper__doc__,
3770"S.isupper() -> bool\n\
3771\n\
3772Return True if all cased characters in S are uppercase and there is\n\
3773at least one cased character in S, False otherwise.");
3774
3775static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003776string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003777{
3778 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003779 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003780 register const unsigned char *e;
3781 int cased;
3782
3783 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003784 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003785 return PyBool_FromLong(isupper(*p) != 0);
3786
3787 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003788 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003789 return PyBool_FromLong(0);
3790
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003791 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003792 cased = 0;
3793 for (; p < e; p++) {
3794 if (islower(*p))
3795 return PyBool_FromLong(0);
3796 else if (!cased && isupper(*p))
3797 cased = 1;
3798 }
3799 return PyBool_FromLong(cased);
3800}
3801
3802
3803PyDoc_STRVAR(istitle__doc__,
3804"S.istitle() -> bool\n\
3805\n\
3806Return True if S is a titlecased string and there is at least one\n\
3807character in S, i.e. uppercase characters may only follow uncased\n\
3808characters and lowercase characters only cased ones. Return False\n\
3809otherwise.");
3810
3811static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003812string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003813{
3814 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003815 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003816 register const unsigned char *e;
3817 int cased, previous_is_cased;
3818
3819 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003820 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003821 return PyBool_FromLong(isupper(*p) != 0);
3822
3823 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003824 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003825 return PyBool_FromLong(0);
3826
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003827 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003828 cased = 0;
3829 previous_is_cased = 0;
3830 for (; p < e; p++) {
3831 register const unsigned char ch = *p;
3832
3833 if (isupper(ch)) {
3834 if (previous_is_cased)
3835 return PyBool_FromLong(0);
3836 previous_is_cased = 1;
3837 cased = 1;
3838 }
3839 else if (islower(ch)) {
3840 if (!previous_is_cased)
3841 return PyBool_FromLong(0);
3842 previous_is_cased = 1;
3843 cased = 1;
3844 }
3845 else
3846 previous_is_cased = 0;
3847 }
3848 return PyBool_FromLong(cased);
3849}
3850
3851
3852PyDoc_STRVAR(splitlines__doc__,
3853"S.splitlines([keepends]) -> list of strings\n\
3854\n\
3855Return a list of the lines in S, breaking at line boundaries.\n\
3856Line breaks are not included in the resulting list unless keepends\n\
3857is given and true.");
3858
3859static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003860string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003861{
3862 register Py_ssize_t i;
3863 register Py_ssize_t j;
3864 Py_ssize_t len;
3865 int keepends = 0;
3866 PyObject *list;
3867 PyObject *str;
3868 char *data;
3869
3870 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3871 return NULL;
3872
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003873 data = PyString_AS_STRING(self);
3874 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003875
3876 /* This does not use the preallocated list because splitlines is
3877 usually run with hundreds of newlines. The overhead of
3878 switching between PyList_SET_ITEM and append causes about a
3879 2-3% slowdown for that common case. A smarter implementation
3880 could move the if check out, so the SET_ITEMs are done first
3881 and the appends only done when the prealloc buffer is full.
3882 That's too much work for little gain.*/
3883
3884 list = PyList_New(0);
3885 if (!list)
3886 goto onError;
3887
3888 for (i = j = 0; i < len; ) {
3889 Py_ssize_t eol;
3890
3891 /* Find a line and append it */
3892 while (i < len && data[i] != '\n' && data[i] != '\r')
3893 i++;
3894
3895 /* Skip the line break reading CRLF as one line break */
3896 eol = i;
3897 if (i < len) {
3898 if (data[i] == '\r' && i + 1 < len &&
3899 data[i+1] == '\n')
3900 i += 2;
3901 else
3902 i++;
3903 if (keepends)
3904 eol = i;
3905 }
3906 SPLIT_APPEND(data, j, eol);
3907 j = i;
3908 }
3909 if (j < len) {
3910 SPLIT_APPEND(data, j, len);
3911 }
3912
3913 return list;
3914
3915 onError:
3916 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003917 return NULL;
3918}
3919
Robert Schuppenies51df0642008-06-01 16:16:17 +00003920PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003921"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003922
3923static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003924string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003925{
3926 Py_ssize_t res;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003927 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003928 return PyInt_FromSsize_t(res);
3929}
3930
Christian Heimes44720832008-05-26 13:01:01 +00003931#undef SPLIT_APPEND
3932#undef SPLIT_ADD
3933#undef MAX_PREALLOC
3934#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003935
3936static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003937string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003938{
Christian Heimes44720832008-05-26 13:01:01 +00003939 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003940}
3941
Christian Heimes1a6387e2008-03-26 12:49:49 +00003942
Christian Heimes44720832008-05-26 13:01:01 +00003943#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003944
Christian Heimes44720832008-05-26 13:01:01 +00003945PyDoc_STRVAR(format__doc__,
3946"S.format(*args, **kwargs) -> unicode\n\
3947\n\
3948");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003949
Eric Smithdc13b792008-05-30 18:10:04 +00003950static PyObject *
3951string__format__(PyObject* self, PyObject* args)
3952{
3953 PyObject *format_spec;
3954 PyObject *result = NULL;
3955 PyObject *tmp = NULL;
3956
3957 /* If 2.x, convert format_spec to the same type as value */
3958 /* This is to allow things like u''.format('') */
3959 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3960 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003961 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003962 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3963 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3964 goto done;
3965 }
3966 tmp = PyObject_Str(format_spec);
3967 if (tmp == NULL)
3968 goto done;
3969 format_spec = tmp;
3970
3971 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003972 PyString_AS_STRING(format_spec),
3973 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003974done:
3975 Py_XDECREF(tmp);
3976 return result;
3977}
3978
Christian Heimes44720832008-05-26 13:01:01 +00003979PyDoc_STRVAR(p_format__doc__,
3980"S.__format__(format_spec) -> unicode\n\
3981\n\
3982");
3983
3984
Christian Heimes1a6387e2008-03-26 12:49:49 +00003985static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003986string_methods[] = {
3987 /* Counterparts of the obsolete stropmodule functions; except
3988 string.maketrans(). */
3989 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3990 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3991 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3992 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3993 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3994 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3995 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3996 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3997 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3998 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3999 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4000 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4001 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4002 capitalize__doc__},
4003 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4004 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4005 endswith__doc__},
4006 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4007 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4008 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4009 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4010 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4011 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4012 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4013 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4014 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4015 rpartition__doc__},
4016 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4017 startswith__doc__},
4018 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4019 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4020 swapcase__doc__},
4021 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4022 translate__doc__},
4023 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4024 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4025 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4026 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4027 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4028 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4029 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4030 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4031 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4032 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4033 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4034 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4035 expandtabs__doc__},
4036 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4037 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004038 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4039 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004040 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4041 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004042};
4043
4044static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004045str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004046
Christian Heimes44720832008-05-26 13:01:01 +00004047static PyObject *
4048string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4049{
4050 PyObject *x = NULL;
4051 static char *kwlist[] = {"object", 0};
4052
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004053 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004054 return str_subtype_new(type, args, kwds);
4055 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4056 return NULL;
4057 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004058 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004059 return PyObject_Str(x);
4060}
4061
4062static PyObject *
4063str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4064{
4065 PyObject *tmp, *pnew;
4066 Py_ssize_t n;
4067
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004068 assert(PyType_IsSubtype(type, &PyString_Type));
4069 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004070 if (tmp == NULL)
4071 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004072 assert(PyString_CheckExact(tmp));
4073 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004074 pnew = type->tp_alloc(type, n);
4075 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004076 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4077 ((PyStringObject *)pnew)->ob_shash =
4078 ((PyStringObject *)tmp)->ob_shash;
4079 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004080 }
4081 Py_DECREF(tmp);
4082 return pnew;
4083}
4084
4085static PyObject *
4086basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4087{
4088 PyErr_SetString(PyExc_TypeError,
4089 "The basestring type cannot be instantiated");
4090 return NULL;
4091}
4092
4093static PyObject *
4094string_mod(PyObject *v, PyObject *w)
4095{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004096 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004097 Py_INCREF(Py_NotImplemented);
4098 return Py_NotImplemented;
4099 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004100 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004101}
4102
4103PyDoc_STRVAR(basestring_doc,
4104"Type basestring cannot be instantiated; it is the base for str and unicode.");
4105
4106static PyNumberMethods string_as_number = {
4107 0, /*nb_add*/
4108 0, /*nb_subtract*/
4109 0, /*nb_multiply*/
4110 0, /*nb_divide*/
4111 string_mod, /*nb_remainder*/
4112};
4113
4114
4115PyTypeObject PyBaseString_Type = {
4116 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4117 "basestring",
4118 0,
4119 0,
4120 0, /* tp_dealloc */
4121 0, /* tp_print */
4122 0, /* tp_getattr */
4123 0, /* tp_setattr */
4124 0, /* tp_compare */
4125 0, /* tp_repr */
4126 0, /* tp_as_number */
4127 0, /* tp_as_sequence */
4128 0, /* tp_as_mapping */
4129 0, /* tp_hash */
4130 0, /* tp_call */
4131 0, /* tp_str */
4132 0, /* tp_getattro */
4133 0, /* tp_setattro */
4134 0, /* tp_as_buffer */
4135 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4136 basestring_doc, /* tp_doc */
4137 0, /* tp_traverse */
4138 0, /* tp_clear */
4139 0, /* tp_richcompare */
4140 0, /* tp_weaklistoffset */
4141 0, /* tp_iter */
4142 0, /* tp_iternext */
4143 0, /* tp_methods */
4144 0, /* tp_members */
4145 0, /* tp_getset */
4146 &PyBaseObject_Type, /* tp_base */
4147 0, /* tp_dict */
4148 0, /* tp_descr_get */
4149 0, /* tp_descr_set */
4150 0, /* tp_dictoffset */
4151 0, /* tp_init */
4152 0, /* tp_alloc */
4153 basestring_new, /* tp_new */
4154 0, /* tp_free */
4155};
4156
4157PyDoc_STRVAR(string_doc,
4158"str(object) -> string\n\
4159\n\
4160Return a nice string representation of the object.\n\
4161If the argument is a string, the return value is the same object.");
4162
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004163PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004164 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4165 "str",
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004166 sizeof(PyStringObject),
Christian Heimes44720832008-05-26 13:01:01 +00004167 sizeof(char),
4168 string_dealloc, /* tp_dealloc */
4169 (printfunc)string_print, /* tp_print */
4170 0, /* tp_getattr */
4171 0, /* tp_setattr */
4172 0, /* tp_compare */
4173 string_repr, /* tp_repr */
4174 &string_as_number, /* tp_as_number */
4175 &string_as_sequence, /* tp_as_sequence */
4176 &string_as_mapping, /* tp_as_mapping */
4177 (hashfunc)string_hash, /* tp_hash */
4178 0, /* tp_call */
4179 string_str, /* tp_str */
4180 PyObject_GenericGetAttr, /* tp_getattro */
4181 0, /* tp_setattro */
4182 &string_as_buffer, /* tp_as_buffer */
4183 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4184 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4185 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4186 string_doc, /* tp_doc */
4187 0, /* tp_traverse */
4188 0, /* tp_clear */
4189 (richcmpfunc)string_richcompare, /* tp_richcompare */
4190 0, /* tp_weaklistoffset */
4191 0, /* tp_iter */
4192 0, /* tp_iternext */
4193 string_methods, /* tp_methods */
4194 0, /* tp_members */
4195 0, /* tp_getset */
4196 &PyBaseString_Type, /* tp_base */
4197 0, /* tp_dict */
4198 0, /* tp_descr_get */
4199 0, /* tp_descr_set */
4200 0, /* tp_dictoffset */
4201 0, /* tp_init */
4202 0, /* tp_alloc */
4203 string_new, /* tp_new */
4204 PyObject_Del, /* tp_free */
4205};
4206
4207void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004208PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004209{
4210 register PyObject *v;
4211 if (*pv == NULL)
4212 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004213 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004214 Py_DECREF(*pv);
4215 *pv = NULL;
4216 return;
4217 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004218 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004219 Py_DECREF(*pv);
4220 *pv = v;
4221}
4222
4223void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004224PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004225{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004226 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004227 Py_XDECREF(w);
4228}
4229
4230
4231/* The following function breaks the notion that strings are immutable:
4232 it changes the size of a string. We get away with this only if there
4233 is only one module referencing the object. You can also think of it
4234 as creating a new string object and destroying the old one, only
4235 more efficiently. In any case, don't use this if the string may
4236 already be known to some other part of the code...
4237 Note that if there's not enough memory to resize the string, the original
4238 string object at *pv is deallocated, *pv is set to NULL, an "out of
4239 memory" exception is set, and -1 is returned. Else (on success) 0 is
4240 returned, and the value in *pv may or may not be the same as on input.
4241 As always, an extra byte is allocated for a trailing \0 byte (newsize
4242 does *not* include that), and a trailing \0 byte is stored.
4243*/
4244
4245int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004246_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004247{
4248 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004249 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004250 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004251 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4252 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004253 *pv = 0;
4254 Py_DECREF(v);
4255 PyErr_BadInternalCall();
4256 return -1;
4257 }
4258 /* XXX UNREF/NEWREF interface should be more symmetrical */
4259 _Py_DEC_REFTOTAL;
4260 _Py_ForgetReference(v);
4261 *pv = (PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004262 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004263 if (*pv == NULL) {
4264 PyObject_Del(v);
4265 PyErr_NoMemory();
4266 return -1;
4267 }
4268 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004269 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004270 Py_SIZE(sv) = newsize;
4271 sv->ob_sval[newsize] = '\0';
4272 sv->ob_shash = -1; /* invalidate cached hash value */
4273 return 0;
4274}
4275
4276/* Helpers for formatstring */
4277
4278Py_LOCAL_INLINE(PyObject *)
4279getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4280{
4281 Py_ssize_t argidx = *p_argidx;
4282 if (argidx < arglen) {
4283 (*p_argidx)++;
4284 if (arglen < 0)
4285 return args;
4286 else
4287 return PyTuple_GetItem(args, argidx);
4288 }
4289 PyErr_SetString(PyExc_TypeError,
4290 "not enough arguments for format string");
4291 return NULL;
4292}
4293
4294/* Format codes
4295 * F_LJUST '-'
4296 * F_SIGN '+'
4297 * F_BLANK ' '
4298 * F_ALT '#'
4299 * F_ZERO '0'
4300 */
4301#define F_LJUST (1<<0)
4302#define F_SIGN (1<<1)
4303#define F_BLANK (1<<2)
4304#define F_ALT (1<<3)
4305#define F_ZERO (1<<4)
4306
4307Py_LOCAL_INLINE(int)
4308formatfloat(char *buf, size_t buflen, int flags,
4309 int prec, int type, PyObject *v)
4310{
4311 /* fmt = '%#.' + `prec` + `type`
4312 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4313 char fmt[20];
4314 double x;
4315 x = PyFloat_AsDouble(v);
4316 if (x == -1.0 && PyErr_Occurred()) {
4317 PyErr_Format(PyExc_TypeError, "float argument required, "
4318 "not %.200s", Py_TYPE(v)->tp_name);
4319 return -1;
4320 }
4321 if (prec < 0)
4322 prec = 6;
Eric Smithd6c393a2008-07-17 19:49:47 +00004323 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4324 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004325 /* Worst case length calc to ensure no buffer overrun:
4326
4327 'g' formats:
4328 fmt = %#.<prec>g
4329 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4330 for any double rep.)
4331 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4332
4333 'f' formats:
4334 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4335 len = 1 + 50 + 1 + prec = 52 + prec
4336
4337 If prec=0 the effective precision is 1 (the leading digit is
4338 always given), therefore increase the length by one.
4339
4340 */
4341 if (((type == 'g' || type == 'G') &&
4342 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004343 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004344 PyErr_SetString(PyExc_OverflowError,
4345 "formatted float is too long (precision too large?)");
4346 return -1;
4347 }
4348 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4349 (flags&F_ALT) ? "#" : "",
4350 prec, type);
4351 PyOS_ascii_formatd(buf, buflen, fmt, x);
4352 return (int)strlen(buf);
4353}
4354
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004355/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004356 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4357 * Python's regular ints.
4358 * Return value: a new PyString*, or NULL if error.
4359 * . *pbuf is set to point into it,
4360 * *plen set to the # of chars following that.
4361 * Caller must decref it when done using pbuf.
4362 * The string starting at *pbuf is of the form
4363 * "-"? ("0x" | "0X")? digit+
4364 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4365 * set in flags. The case of hex digits will be correct,
4366 * There will be at least prec digits, zero-filled on the left if
4367 * necessary to get that many.
4368 * val object to be converted
4369 * flags bitmask of format flags; only F_ALT is looked at
4370 * prec minimum number of digits; 0-fill on left if needed
4371 * type a character in [duoxX]; u acts the same as d
4372 *
4373 * CAUTION: o, x and X conversions on regular ints can never
4374 * produce a '-' sign, but can for Python's unbounded ints.
4375 */
4376PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004377_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004378 char **pbuf, int *plen)
4379{
4380 PyObject *result = NULL;
4381 char *buf;
4382 Py_ssize_t i;
4383 int sign; /* 1 if '-', else 0 */
4384 int len; /* number of characters */
4385 Py_ssize_t llen;
4386 int numdigits; /* len == numnondigits + numdigits */
4387 int numnondigits = 0;
4388
4389 switch (type) {
4390 case 'd':
4391 case 'u':
4392 result = Py_TYPE(val)->tp_str(val);
4393 break;
4394 case 'o':
4395 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4396 break;
4397 case 'x':
4398 case 'X':
4399 numnondigits = 2;
4400 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4401 break;
4402 default:
4403 assert(!"'type' not in [duoxX]");
4404 }
4405 if (!result)
4406 return NULL;
4407
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004408 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004409 if (!buf) {
4410 Py_DECREF(result);
4411 return NULL;
4412 }
4413
4414 /* To modify the string in-place, there can only be one reference. */
4415 if (Py_REFCNT(result) != 1) {
4416 PyErr_BadInternalCall();
4417 return NULL;
4418 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004419 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004420 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004421 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004422 return NULL;
4423 }
4424 len = (int)llen;
4425 if (buf[len-1] == 'L') {
4426 --len;
4427 buf[len] = '\0';
4428 }
4429 sign = buf[0] == '-';
4430 numnondigits += sign;
4431 numdigits = len - numnondigits;
4432 assert(numdigits > 0);
4433
4434 /* Get rid of base marker unless F_ALT */
4435 if ((flags & F_ALT) == 0) {
4436 /* Need to skip 0x, 0X or 0. */
4437 int skipped = 0;
4438 switch (type) {
4439 case 'o':
4440 assert(buf[sign] == '0');
4441 /* If 0 is only digit, leave it alone. */
4442 if (numdigits > 1) {
4443 skipped = 1;
4444 --numdigits;
4445 }
4446 break;
4447 case 'x':
4448 case 'X':
4449 assert(buf[sign] == '0');
4450 assert(buf[sign + 1] == 'x');
4451 skipped = 2;
4452 numnondigits -= 2;
4453 break;
4454 }
4455 if (skipped) {
4456 buf += skipped;
4457 len -= skipped;
4458 if (sign)
4459 buf[0] = '-';
4460 }
4461 assert(len == numnondigits + numdigits);
4462 assert(numdigits > 0);
4463 }
4464
4465 /* Fill with leading zeroes to meet minimum width. */
4466 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004467 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004468 numnondigits + prec);
4469 char *b1;
4470 if (!r1) {
4471 Py_DECREF(result);
4472 return NULL;
4473 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004474 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004475 for (i = 0; i < numnondigits; ++i)
4476 *b1++ = *buf++;
4477 for (i = 0; i < prec - numdigits; i++)
4478 *b1++ = '0';
4479 for (i = 0; i < numdigits; i++)
4480 *b1++ = *buf++;
4481 *b1 = '\0';
4482 Py_DECREF(result);
4483 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004484 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004485 len = numnondigits + prec;
4486 }
4487
4488 /* Fix up case for hex conversions. */
4489 if (type == 'X') {
4490 /* Need to convert all lower case letters to upper case.
4491 and need to convert 0x to 0X (and -0x to -0X). */
4492 for (i = 0; i < len; i++)
4493 if (buf[i] >= 'a' && buf[i] <= 'x')
4494 buf[i] -= 'a'-'A';
4495 }
4496 *pbuf = buf;
4497 *plen = len;
4498 return result;
4499}
4500
4501Py_LOCAL_INLINE(int)
4502formatint(char *buf, size_t buflen, int flags,
4503 int prec, int type, PyObject *v)
4504{
4505 /* fmt = '%#.' + `prec` + 'l' + `type`
4506 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4507 + 1 + 1 = 24 */
4508 char fmt[64]; /* plenty big enough! */
4509 char *sign;
4510 long x;
4511
4512 x = PyInt_AsLong(v);
4513 if (x == -1 && PyErr_Occurred()) {
4514 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4515 Py_TYPE(v)->tp_name);
4516 return -1;
4517 }
4518 if (x < 0 && type == 'u') {
4519 type = 'd';
4520 }
4521 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4522 sign = "-";
4523 else
4524 sign = "";
4525 if (prec < 0)
4526 prec = 1;
4527
4528 if ((flags & F_ALT) &&
4529 (type == 'x' || type == 'X')) {
4530 /* When converting under %#x or %#X, there are a number
4531 * of issues that cause pain:
4532 * - when 0 is being converted, the C standard leaves off
4533 * the '0x' or '0X', which is inconsistent with other
4534 * %#x/%#X conversions and inconsistent with Python's
4535 * hex() function
4536 * - there are platforms that violate the standard and
4537 * convert 0 with the '0x' or '0X'
4538 * (Metrowerks, Compaq Tru64)
4539 * - there are platforms that give '0x' when converting
4540 * under %#X, but convert 0 in accordance with the
4541 * standard (OS/2 EMX)
4542 *
4543 * We can achieve the desired consistency by inserting our
4544 * own '0x' or '0X' prefix, and substituting %x/%X in place
4545 * of %#x/%#X.
4546 *
4547 * Note that this is the same approach as used in
4548 * formatint() in unicodeobject.c
4549 */
4550 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4551 sign, type, prec, type);
4552 }
4553 else {
4554 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4555 sign, (flags&F_ALT) ? "#" : "",
4556 prec, type);
4557 }
4558
4559 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4560 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4561 */
4562 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4563 PyErr_SetString(PyExc_OverflowError,
4564 "formatted integer is too long (precision too large?)");
4565 return -1;
4566 }
4567 if (sign[0])
4568 PyOS_snprintf(buf, buflen, fmt, -x);
4569 else
4570 PyOS_snprintf(buf, buflen, fmt, x);
4571 return (int)strlen(buf);
4572}
4573
4574Py_LOCAL_INLINE(int)
4575formatchar(char *buf, size_t buflen, PyObject *v)
4576{
4577 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004578 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004579 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4580 return -1;
4581 }
4582 else {
4583 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4584 return -1;
4585 }
4586 buf[1] = '\0';
4587 return 1;
4588}
4589
4590/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4591
4592 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4593 chars are formatted. XXX This is a magic number. Each formatting
4594 routine does bounds checking to ensure no overflow, but a better
4595 solution may be to malloc a buffer of appropriate size for each
4596 format. For now, the current solution is sufficient.
4597*/
4598#define FORMATBUFLEN (size_t)120
4599
4600PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004601PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004602{
4603 char *fmt, *res;
4604 Py_ssize_t arglen, argidx;
4605 Py_ssize_t reslen, rescnt, fmtcnt;
4606 int args_owned = 0;
4607 PyObject *result, *orig_args;
4608#ifdef Py_USING_UNICODE
4609 PyObject *v, *w;
4610#endif
4611 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004612 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004613 PyErr_BadInternalCall();
4614 return NULL;
4615 }
4616 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004617 fmt = PyString_AS_STRING(format);
4618 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004619 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004620 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004621 if (result == NULL)
4622 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004623 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004624 if (PyTuple_Check(args)) {
4625 arglen = PyTuple_GET_SIZE(args);
4626 argidx = 0;
4627 }
4628 else {
4629 arglen = -1;
4630 argidx = -2;
4631 }
4632 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4633 !PyObject_TypeCheck(args, &PyBaseString_Type))
4634 dict = args;
4635 while (--fmtcnt >= 0) {
4636 if (*fmt != '%') {
4637 if (--rescnt < 0) {
4638 rescnt = fmtcnt + 100;
4639 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004640 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004641 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004642 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004643 + reslen - rescnt;
4644 --rescnt;
4645 }
4646 *res++ = *fmt++;
4647 }
4648 else {
4649 /* Got a format specifier */
4650 int flags = 0;
4651 Py_ssize_t width = -1;
4652 int prec = -1;
4653 int c = '\0';
4654 int fill;
4655 int isnumok;
4656 PyObject *v = NULL;
4657 PyObject *temp = NULL;
4658 char *pbuf;
4659 int sign;
4660 Py_ssize_t len;
4661 char formatbuf[FORMATBUFLEN];
4662 /* For format{float,int,char}() */
4663#ifdef Py_USING_UNICODE
4664 char *fmt_start = fmt;
4665 Py_ssize_t argidx_start = argidx;
4666#endif
4667
4668 fmt++;
4669 if (*fmt == '(') {
4670 char *keystart;
4671 Py_ssize_t keylen;
4672 PyObject *key;
4673 int pcount = 1;
4674
4675 if (dict == NULL) {
4676 PyErr_SetString(PyExc_TypeError,
4677 "format requires a mapping");
4678 goto error;
4679 }
4680 ++fmt;
4681 --fmtcnt;
4682 keystart = fmt;
4683 /* Skip over balanced parentheses */
4684 while (pcount > 0 && --fmtcnt >= 0) {
4685 if (*fmt == ')')
4686 --pcount;
4687 else if (*fmt == '(')
4688 ++pcount;
4689 fmt++;
4690 }
4691 keylen = fmt - keystart - 1;
4692 if (fmtcnt < 0 || pcount > 0) {
4693 PyErr_SetString(PyExc_ValueError,
4694 "incomplete format key");
4695 goto error;
4696 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004697 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004698 keylen);
4699 if (key == NULL)
4700 goto error;
4701 if (args_owned) {
4702 Py_DECREF(args);
4703 args_owned = 0;
4704 }
4705 args = PyObject_GetItem(dict, key);
4706 Py_DECREF(key);
4707 if (args == NULL) {
4708 goto error;
4709 }
4710 args_owned = 1;
4711 arglen = -1;
4712 argidx = -2;
4713 }
4714 while (--fmtcnt >= 0) {
4715 switch (c = *fmt++) {
4716 case '-': flags |= F_LJUST; continue;
4717 case '+': flags |= F_SIGN; continue;
4718 case ' ': flags |= F_BLANK; continue;
4719 case '#': flags |= F_ALT; continue;
4720 case '0': flags |= F_ZERO; continue;
4721 }
4722 break;
4723 }
4724 if (c == '*') {
4725 v = getnextarg(args, arglen, &argidx);
4726 if (v == NULL)
4727 goto error;
4728 if (!PyInt_Check(v)) {
4729 PyErr_SetString(PyExc_TypeError,
4730 "* wants int");
4731 goto error;
4732 }
4733 width = PyInt_AsLong(v);
4734 if (width < 0) {
4735 flags |= F_LJUST;
4736 width = -width;
4737 }
4738 if (--fmtcnt >= 0)
4739 c = *fmt++;
4740 }
4741 else if (c >= 0 && isdigit(c)) {
4742 width = c - '0';
4743 while (--fmtcnt >= 0) {
4744 c = Py_CHARMASK(*fmt++);
4745 if (!isdigit(c))
4746 break;
4747 if ((width*10) / 10 != width) {
4748 PyErr_SetString(
4749 PyExc_ValueError,
4750 "width too big");
4751 goto error;
4752 }
4753 width = width*10 + (c - '0');
4754 }
4755 }
4756 if (c == '.') {
4757 prec = 0;
4758 if (--fmtcnt >= 0)
4759 c = *fmt++;
4760 if (c == '*') {
4761 v = getnextarg(args, arglen, &argidx);
4762 if (v == NULL)
4763 goto error;
4764 if (!PyInt_Check(v)) {
4765 PyErr_SetString(
4766 PyExc_TypeError,
4767 "* wants int");
4768 goto error;
4769 }
4770 prec = PyInt_AsLong(v);
4771 if (prec < 0)
4772 prec = 0;
4773 if (--fmtcnt >= 0)
4774 c = *fmt++;
4775 }
4776 else if (c >= 0 && isdigit(c)) {
4777 prec = c - '0';
4778 while (--fmtcnt >= 0) {
4779 c = Py_CHARMASK(*fmt++);
4780 if (!isdigit(c))
4781 break;
4782 if ((prec*10) / 10 != prec) {
4783 PyErr_SetString(
4784 PyExc_ValueError,
4785 "prec too big");
4786 goto error;
4787 }
4788 prec = prec*10 + (c - '0');
4789 }
4790 }
4791 } /* prec */
4792 if (fmtcnt >= 0) {
4793 if (c == 'h' || c == 'l' || c == 'L') {
4794 if (--fmtcnt >= 0)
4795 c = *fmt++;
4796 }
4797 }
4798 if (fmtcnt < 0) {
4799 PyErr_SetString(PyExc_ValueError,
4800 "incomplete format");
4801 goto error;
4802 }
4803 if (c != '%') {
4804 v = getnextarg(args, arglen, &argidx);
4805 if (v == NULL)
4806 goto error;
4807 }
4808 sign = 0;
4809 fill = ' ';
4810 switch (c) {
4811 case '%':
4812 pbuf = "%";
4813 len = 1;
4814 break;
4815 case 's':
4816#ifdef Py_USING_UNICODE
4817 if (PyUnicode_Check(v)) {
4818 fmt = fmt_start;
4819 argidx = argidx_start;
4820 goto unicode;
4821 }
4822#endif
4823 temp = _PyObject_Str(v);
4824#ifdef Py_USING_UNICODE
4825 if (temp != NULL && PyUnicode_Check(temp)) {
4826 Py_DECREF(temp);
4827 fmt = fmt_start;
4828 argidx = argidx_start;
4829 goto unicode;
4830 }
4831#endif
4832 /* Fall through */
4833 case 'r':
4834 if (c == 'r')
4835 temp = PyObject_Repr(v);
4836 if (temp == NULL)
4837 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004838 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004839 PyErr_SetString(PyExc_TypeError,
4840 "%s argument has non-string str()");
4841 Py_DECREF(temp);
4842 goto error;
4843 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004844 pbuf = PyString_AS_STRING(temp);
4845 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004846 if (prec >= 0 && len > prec)
4847 len = prec;
4848 break;
4849 case 'i':
4850 case 'd':
4851 case 'u':
4852 case 'o':
4853 case 'x':
4854 case 'X':
4855 if (c == 'i')
4856 c = 'd';
4857 isnumok = 0;
4858 if (PyNumber_Check(v)) {
4859 PyObject *iobj=NULL;
4860
4861 if (PyInt_Check(v) || (PyLong_Check(v))) {
4862 iobj = v;
4863 Py_INCREF(iobj);
4864 }
4865 else {
4866 iobj = PyNumber_Int(v);
4867 if (iobj==NULL) iobj = PyNumber_Long(v);
4868 }
4869 if (iobj!=NULL) {
4870 if (PyInt_Check(iobj)) {
4871 isnumok = 1;
4872 pbuf = formatbuf;
4873 len = formatint(pbuf,
4874 sizeof(formatbuf),
4875 flags, prec, c, iobj);
4876 Py_DECREF(iobj);
4877 if (len < 0)
4878 goto error;
4879 sign = 1;
4880 }
4881 else if (PyLong_Check(iobj)) {
4882 int ilen;
4883
4884 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004885 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004886 prec, c, &pbuf, &ilen);
4887 Py_DECREF(iobj);
4888 len = ilen;
4889 if (!temp)
4890 goto error;
4891 sign = 1;
4892 }
4893 else {
4894 Py_DECREF(iobj);
4895 }
4896 }
4897 }
4898 if (!isnumok) {
4899 PyErr_Format(PyExc_TypeError,
4900 "%%%c format: a number is required, "
4901 "not %.200s", c, Py_TYPE(v)->tp_name);
4902 goto error;
4903 }
4904 if (flags & F_ZERO)
4905 fill = '0';
4906 break;
4907 case 'e':
4908 case 'E':
4909 case 'f':
4910 case 'F':
4911 case 'g':
4912 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00004913 if (c == 'F')
4914 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00004915 pbuf = formatbuf;
4916 len = formatfloat(pbuf, sizeof(formatbuf),
4917 flags, prec, c, v);
4918 if (len < 0)
4919 goto error;
4920 sign = 1;
4921 if (flags & F_ZERO)
4922 fill = '0';
4923 break;
4924 case 'c':
4925#ifdef Py_USING_UNICODE
4926 if (PyUnicode_Check(v)) {
4927 fmt = fmt_start;
4928 argidx = argidx_start;
4929 goto unicode;
4930 }
4931#endif
4932 pbuf = formatbuf;
4933 len = formatchar(pbuf, sizeof(formatbuf), v);
4934 if (len < 0)
4935 goto error;
4936 break;
4937 default:
4938 PyErr_Format(PyExc_ValueError,
4939 "unsupported format character '%c' (0x%x) "
4940 "at index %zd",
4941 c, c,
4942 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004943 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004944 goto error;
4945 }
4946 if (sign) {
4947 if (*pbuf == '-' || *pbuf == '+') {
4948 sign = *pbuf++;
4949 len--;
4950 }
4951 else if (flags & F_SIGN)
4952 sign = '+';
4953 else if (flags & F_BLANK)
4954 sign = ' ';
4955 else
4956 sign = 0;
4957 }
4958 if (width < len)
4959 width = len;
4960 if (rescnt - (sign != 0) < width) {
4961 reslen -= rescnt;
4962 rescnt = width + fmtcnt + 100;
4963 reslen += rescnt;
4964 if (reslen < 0) {
4965 Py_DECREF(result);
4966 Py_XDECREF(temp);
4967 return PyErr_NoMemory();
4968 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004969 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00004970 Py_XDECREF(temp);
4971 return NULL;
4972 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004973 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004974 + reslen - rescnt;
4975 }
4976 if (sign) {
4977 if (fill != ' ')
4978 *res++ = sign;
4979 rescnt--;
4980 if (width > len)
4981 width--;
4982 }
4983 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4984 assert(pbuf[0] == '0');
4985 assert(pbuf[1] == c);
4986 if (fill != ' ') {
4987 *res++ = *pbuf++;
4988 *res++ = *pbuf++;
4989 }
4990 rescnt -= 2;
4991 width -= 2;
4992 if (width < 0)
4993 width = 0;
4994 len -= 2;
4995 }
4996 if (width > len && !(flags & F_LJUST)) {
4997 do {
4998 --rescnt;
4999 *res++ = fill;
5000 } while (--width > len);
5001 }
5002 if (fill == ' ') {
5003 if (sign)
5004 *res++ = sign;
5005 if ((flags & F_ALT) &&
5006 (c == 'x' || c == 'X')) {
5007 assert(pbuf[0] == '0');
5008 assert(pbuf[1] == c);
5009 *res++ = *pbuf++;
5010 *res++ = *pbuf++;
5011 }
5012 }
5013 Py_MEMCPY(res, pbuf, len);
5014 res += len;
5015 rescnt -= len;
5016 while (--width >= len) {
5017 --rescnt;
5018 *res++ = ' ';
5019 }
5020 if (dict && (argidx < arglen) && c != '%') {
5021 PyErr_SetString(PyExc_TypeError,
5022 "not all arguments converted during string formatting");
5023 Py_XDECREF(temp);
5024 goto error;
5025 }
5026 Py_XDECREF(temp);
5027 } /* '%' */
5028 } /* until end */
5029 if (argidx < arglen && !dict) {
5030 PyErr_SetString(PyExc_TypeError,
5031 "not all arguments converted during string formatting");
5032 goto error;
5033 }
5034 if (args_owned) {
5035 Py_DECREF(args);
5036 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005037 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005038 return result;
5039
5040#ifdef Py_USING_UNICODE
5041 unicode:
5042 if (args_owned) {
5043 Py_DECREF(args);
5044 args_owned = 0;
5045 }
5046 /* Fiddle args right (remove the first argidx arguments) */
5047 if (PyTuple_Check(orig_args) && argidx > 0) {
5048 PyObject *v;
5049 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5050 v = PyTuple_New(n);
5051 if (v == NULL)
5052 goto error;
5053 while (--n >= 0) {
5054 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5055 Py_INCREF(w);
5056 PyTuple_SET_ITEM(v, n, w);
5057 }
5058 args = v;
5059 } else {
5060 Py_INCREF(orig_args);
5061 args = orig_args;
5062 }
5063 args_owned = 1;
5064 /* Take what we have of the result and let the Unicode formatting
5065 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005066 rescnt = res - PyString_AS_STRING(result);
5067 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005068 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005069 fmtcnt = PyString_GET_SIZE(format) - \
5070 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005071 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5072 if (format == NULL)
5073 goto error;
5074 v = PyUnicode_Format(format, args);
5075 Py_DECREF(format);
5076 if (v == NULL)
5077 goto error;
5078 /* Paste what we have (result) to what the Unicode formatting
5079 function returned (v) and return the result (or error) */
5080 w = PyUnicode_Concat(result, v);
5081 Py_DECREF(result);
5082 Py_DECREF(v);
5083 Py_DECREF(args);
5084 return w;
5085#endif /* Py_USING_UNICODE */
5086
5087 error:
5088 Py_DECREF(result);
5089 if (args_owned) {
5090 Py_DECREF(args);
5091 }
5092 return NULL;
5093}
5094
5095void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005096PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005097{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005098 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005099 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005100 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005101 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005102 /* If it's a string subclass, we don't really know what putting
5103 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005104 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005105 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005106 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005107 return;
5108 if (interned == NULL) {
5109 interned = PyDict_New();
5110 if (interned == NULL) {
5111 PyErr_Clear(); /* Don't leave an exception */
5112 return;
5113 }
5114 }
5115 t = PyDict_GetItem(interned, (PyObject *)s);
5116 if (t) {
5117 Py_INCREF(t);
5118 Py_DECREF(*p);
5119 *p = t;
5120 return;
5121 }
5122
5123 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5124 PyErr_Clear();
5125 return;
5126 }
5127 /* The two references in interned are not counted by refcnt.
5128 The string deallocator will take care of this */
5129 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005130 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005131}
5132
5133void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005134PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005135{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005136 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005137 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5138 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005139 Py_INCREF(*p);
5140 }
5141}
5142
5143
5144PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005145PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005146{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005147 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005148 if (s == NULL)
5149 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005150 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005151 return s;
5152}
5153
5154void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005155PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005156{
5157 int i;
5158 for (i = 0; i < UCHAR_MAX + 1; i++) {
5159 Py_XDECREF(characters[i]);
5160 characters[i] = NULL;
5161 }
5162 Py_XDECREF(nullstring);
5163 nullstring = NULL;
5164}
5165
5166void _Py_ReleaseInternedStrings(void)
5167{
5168 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005169 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005170 Py_ssize_t i, n;
5171 Py_ssize_t immortal_size = 0, mortal_size = 0;
5172
5173 if (interned == NULL || !PyDict_Check(interned))
5174 return;
5175 keys = PyDict_Keys(interned);
5176 if (keys == NULL || !PyList_Check(keys)) {
5177 PyErr_Clear();
5178 return;
5179 }
5180
5181 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5182 detector, interned strings are not forcibly deallocated; rather, we
5183 give them their stolen references back, and then clear and DECREF
5184 the interned dict. */
5185
5186 n = PyList_GET_SIZE(keys);
5187 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5188 n);
5189 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005190 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005191 switch (s->ob_sstate) {
5192 case SSTATE_NOT_INTERNED:
5193 /* XXX Shouldn't happen */
5194 break;
5195 case SSTATE_INTERNED_IMMORTAL:
5196 Py_REFCNT(s) += 1;
5197 immortal_size += Py_SIZE(s);
5198 break;
5199 case SSTATE_INTERNED_MORTAL:
5200 Py_REFCNT(s) += 2;
5201 mortal_size += Py_SIZE(s);
5202 break;
5203 default:
5204 Py_FatalError("Inconsistent interned string state.");
5205 }
5206 s->ob_sstate = SSTATE_NOT_INTERNED;
5207 }
5208 fprintf(stderr, "total size of all interned strings: "
5209 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5210 "mortal/immortal\n", mortal_size, immortal_size);
5211 Py_DECREF(keys);
5212 PyDict_Clear(interned);
5213 Py_DECREF(interned);
5214 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005215}