blob: b161c8cc696d694c60033f93794176997c7ca141 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000054 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000055 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000058 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
61#ifdef COUNT_ALLOCS
62 null_strings++;
63#endif
64 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
70#ifdef COUNT_ALLOCS
71 one_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76
77 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000078 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +000079 if (op == NULL)
80 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000082 op->ob_shash = -1;
83 op->ob_sstate = SSTATE_NOT_INTERNED;
84 if (str != NULL)
85 Py_MEMCPY(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
87 /* share short strings */
88 if (size == 0) {
89 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000090 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000091 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +000092 nullstring = op;
93 Py_INCREF(op);
94 } else if (size == 1 && str != NULL) {
95 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000096 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000097 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +000098 characters[*str & UCHAR_MAX] = op;
99 Py_INCREF(op);
100 }
101 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000102}
103
Christian Heimes44720832008-05-26 13:01:01 +0000104PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000106{
Christian Heimes44720832008-05-26 13:01:01 +0000107 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000108 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000109
110 assert(str != NULL);
111 size = strlen(str);
112 if (size > PY_SSIZE_T_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 Py_INCREF(op);
122 return (PyObject *)op;
123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 Py_INCREF(op);
129 return (PyObject *)op;
130 }
131
132 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000133 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +0000134 if (op == NULL)
135 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000136 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000137 op->ob_shash = -1;
138 op->ob_sstate = SSTATE_NOT_INTERNED;
139 Py_MEMCPY(op->ob_sval, str, size+1);
140 /* share short strings */
141 if (size == 0) {
142 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000143 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000144 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000145 nullstring = op;
146 Py_INCREF(op);
147 } else if (size == 1) {
148 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000149 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000151 characters[*str & UCHAR_MAX] = op;
152 Py_INCREF(op);
153 }
154 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000155}
156
Christian Heimes44720832008-05-26 13:01:01 +0000157PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000159{
Christian Heimes44720832008-05-26 13:01:01 +0000160 va_list count;
161 Py_ssize_t n = 0;
162 const char* f;
163 char *s;
164 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165
Christian Heimes44720832008-05-26 13:01:01 +0000166#ifdef VA_LIST_IS_ARRAY
167 Py_MEMCPY(count, vargs, sizeof(va_list));
168#else
169#ifdef __va_copy
170 __va_copy(count, vargs);
171#else
172 count = vargs;
173#endif
174#endif
175 /* step 1: figure out how large a buffer we need */
176 for (f = format; *f; f++) {
177 if (*f == '%') {
178 const char* p = f;
179 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
180 ;
181
182 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
183 * they don't affect the amount of space we reserve.
184 */
185 if ((*f == 'l' || *f == 'z') &&
186 (f[1] == 'd' || f[1] == 'u'))
187 ++f;
188
189 switch (*f) {
190 case 'c':
191 (void)va_arg(count, int);
192 /* fall through... */
193 case '%':
194 n++;
195 break;
196 case 'd': case 'u': case 'i': case 'x':
197 (void) va_arg(count, int);
198 /* 20 bytes is enough to hold a 64-bit
199 integer. Decimal takes the most space.
200 This isn't enough for octal. */
201 n += 20;
202 break;
203 case 's':
204 s = va_arg(count, char*);
205 n += strlen(s);
206 break;
207 case 'p':
208 (void) va_arg(count, int);
209 /* maximum 64-bit pointer representation:
210 * 0xffffffffffffffff
211 * so 19 characters is enough.
212 * XXX I count 18 -- what's the extra for?
213 */
214 n += 19;
215 break;
216 default:
217 /* if we stumble upon an unknown
218 formatting code, copy the rest of
219 the format string to the output
220 string. (we cannot just skip the
221 code, since there's no way to know
222 what's in the argument list) */
223 n += strlen(p);
224 goto expand;
225 }
226 } else
227 n++;
228 }
229 expand:
230 /* step 2: fill the buffer */
231 /* Since we've analyzed how much space we need for the worst case,
232 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000233 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000234 if (!string)
235 return NULL;
236
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000237 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000238
239 for (f = format; *f; f++) {
240 if (*f == '%') {
241 const char* p = f++;
242 Py_ssize_t i;
243 int longflag = 0;
244 int size_tflag = 0;
245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 if (*f == '.') {
251 f++;
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 }
256 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
257 f++;
258 /* handle the long flag, but only for %ld and %lu.
259 others can be added when necessary. */
260 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
261 longflag = 1;
262 ++f;
263 }
264 /* handle the size_t flag. */
265 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
266 size_tflag = 1;
267 ++f;
268 }
269
270 switch (*f) {
271 case 'c':
272 *s++ = va_arg(vargs, int);
273 break;
274 case 'd':
275 if (longflag)
276 sprintf(s, "%ld", va_arg(vargs, long));
277 else if (size_tflag)
278 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
279 va_arg(vargs, Py_ssize_t));
280 else
281 sprintf(s, "%d", va_arg(vargs, int));
282 s += strlen(s);
283 break;
284 case 'u':
285 if (longflag)
286 sprintf(s, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(s, "%u",
293 va_arg(vargs, unsigned int));
294 s += strlen(s);
295 break;
296 case 'i':
297 sprintf(s, "%i", va_arg(vargs, int));
298 s += strlen(s);
299 break;
300 case 'x':
301 sprintf(s, "%x", va_arg(vargs, int));
302 s += strlen(s);
303 break;
304 case 's':
305 p = va_arg(vargs, char*);
306 i = strlen(p);
307 if (n > 0 && i > n)
308 i = n;
309 Py_MEMCPY(s, p, i);
310 s += i;
311 break;
312 case 'p':
313 sprintf(s, "%p", va_arg(vargs, void*));
314 /* %p is ill-defined: ensure leading 0x. */
315 if (s[1] == 'X')
316 s[1] = 'x';
317 else if (s[1] != 'x') {
318 memmove(s+2, s, strlen(s)+1);
319 s[0] = '0';
320 s[1] = 'x';
321 }
322 s += strlen(s);
323 break;
324 case '%':
325 *s++ = '%';
326 break;
327 default:
328 strcpy(s, p);
329 s += strlen(s);
330 goto end;
331 }
332 } else
333 *s++ = *f;
334 }
335
336 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000337 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000338 return string;
339}
340
341PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000342PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000343{
344 PyObject* ret;
345 va_list vargs;
346
347#ifdef HAVE_STDARG_PROTOTYPES
348 va_start(vargs, format);
349#else
350 va_start(vargs);
351#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000352 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000353 va_end(vargs);
354 return ret;
355}
356
357
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000358PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000359 Py_ssize_t size,
360 const char *encoding,
361 const char *errors)
362{
363 PyObject *v, *str;
364
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000365 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000366 if (str == NULL)
367 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000368 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000369 Py_DECREF(str);
370 return v;
371}
372
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v;
378
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000379 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000380 PyErr_BadArgument();
381 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000382 }
383
Christian Heimes44720832008-05-26 13:01:01 +0000384 if (encoding == NULL) {
385#ifdef Py_USING_UNICODE
386 encoding = PyUnicode_GetDefaultEncoding();
387#else
388 PyErr_SetString(PyExc_ValueError, "no encoding specified");
389 goto onError;
390#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 /* Decode via the codec registry */
394 v = PyCodec_Decode(str, encoding, errors);
395 if (v == NULL)
396 goto onError;
397
398 return v;
399
400 onError:
401 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000402}
403
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000404PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000405 const char *encoding,
406 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407{
Christian Heimes44720832008-05-26 13:01:01 +0000408 PyObject *v;
409
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000410 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000411 if (v == NULL)
412 goto onError;
413
414#ifdef Py_USING_UNICODE
415 /* Convert Unicode to a string using the default encoding */
416 if (PyUnicode_Check(v)) {
417 PyObject *temp = v;
418 v = PyUnicode_AsEncodedString(v, NULL, NULL);
419 Py_DECREF(temp);
420 if (v == NULL)
421 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000422 }
Christian Heimes44720832008-05-26 13:01:01 +0000423#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000424 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000425 PyErr_Format(PyExc_TypeError,
426 "decoder did not return a string object (type=%.400s)",
427 Py_TYPE(v)->tp_name);
428 Py_DECREF(v);
429 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000430 }
Christian Heimes44720832008-05-26 13:01:01 +0000431
432 return v;
433
434 onError:
435 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436}
437
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000438PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000439 Py_ssize_t size,
440 const char *encoding,
441 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000442{
Christian Heimes44720832008-05-26 13:01:01 +0000443 PyObject *v, *str;
444
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000445 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000446 if (str == NULL)
447 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000448 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000449 Py_DECREF(str);
450 return v;
451}
452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000454 const char *encoding,
455 const char *errors)
456{
457 PyObject *v;
458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000459 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000460 PyErr_BadArgument();
461 goto onError;
462 }
463
464 if (encoding == NULL) {
465#ifdef Py_USING_UNICODE
466 encoding = PyUnicode_GetDefaultEncoding();
467#else
468 PyErr_SetString(PyExc_ValueError, "no encoding specified");
469 goto onError;
470#endif
471 }
472
473 /* Encode via the codec registry */
474 v = PyCodec_Encode(str, encoding, errors);
475 if (v == NULL)
476 goto onError;
477
478 return v;
479
480 onError:
481 return NULL;
482}
483
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000484PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000485 const char *encoding,
486 const char *errors)
487{
488 PyObject *v;
489
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000490 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000491 if (v == NULL)
492 goto onError;
493
494#ifdef Py_USING_UNICODE
495 /* Convert Unicode to a string using the default encoding */
496 if (PyUnicode_Check(v)) {
497 PyObject *temp = v;
498 v = PyUnicode_AsEncodedString(v, NULL, NULL);
499 Py_DECREF(temp);
500 if (v == NULL)
501 goto onError;
502 }
503#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000504 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000505 PyErr_Format(PyExc_TypeError,
506 "encoder did not return a string object (type=%.400s)",
507 Py_TYPE(v)->tp_name);
508 Py_DECREF(v);
509 goto onError;
510 }
511
512 return v;
513
514 onError:
515 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000516}
517
518static void
Christian Heimes44720832008-05-26 13:01:01 +0000519string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000520{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000521 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000522 case SSTATE_NOT_INTERNED:
523 break;
524
525 case SSTATE_INTERNED_MORTAL:
526 /* revive dead object temporarily for DelItem */
527 Py_REFCNT(op) = 3;
528 if (PyDict_DelItem(interned, op) != 0)
529 Py_FatalError(
530 "deletion of interned string failed");
531 break;
532
533 case SSTATE_INTERNED_IMMORTAL:
534 Py_FatalError("Immortal interned string died.");
535
536 default:
537 Py_FatalError("Inconsistent interned string state.");
538 }
539 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000540}
541
Christian Heimes44720832008-05-26 13:01:01 +0000542/* Unescape a backslash-escaped string. If unicode is non-zero,
543 the string is a u-literal. If recode_encoding is non-zero,
544 the string is UTF-8 encoded and should be re-encoded in the
545 specified encoding. */
546
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000547PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000548 Py_ssize_t len,
549 const char *errors,
550 Py_ssize_t unicode,
551 const char *recode_encoding)
552{
553 int c;
554 char *p, *buf;
555 const char *end;
556 PyObject *v;
557 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000558 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000559 if (v == NULL)
560 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000561 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000562 end = s + len;
563 while (s < end) {
564 if (*s != '\\') {
565 non_esc:
566#ifdef Py_USING_UNICODE
567 if (recode_encoding && (*s & 0x80)) {
568 PyObject *u, *w;
569 char *r;
570 const char* t;
571 Py_ssize_t rn;
572 t = s;
573 /* Decode non-ASCII bytes as UTF-8. */
574 while (t < end && (*t & 0x80)) t++;
575 u = PyUnicode_DecodeUTF8(s, t - s, errors);
576 if(!u) goto failed;
577
578 /* Recode them in target encoding. */
579 w = PyUnicode_AsEncodedString(
580 u, recode_encoding, errors);
581 Py_DECREF(u);
582 if (!w) goto failed;
583
584 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000585 assert(PyString_Check(w));
586 r = PyString_AS_STRING(w);
587 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000588 Py_MEMCPY(p, r, rn);
589 p += rn;
590 Py_DECREF(w);
591 s = t;
592 } else {
593 *p++ = *s++;
594 }
595#else
596 *p++ = *s++;
597#endif
598 continue;
599 }
600 s++;
601 if (s==end) {
602 PyErr_SetString(PyExc_ValueError,
603 "Trailing \\ in string");
604 goto failed;
605 }
606 switch (*s++) {
607 /* XXX This assumes ASCII! */
608 case '\n': break;
609 case '\\': *p++ = '\\'; break;
610 case '\'': *p++ = '\''; break;
611 case '\"': *p++ = '\"'; break;
612 case 'b': *p++ = '\b'; break;
613 case 'f': *p++ = '\014'; break; /* FF */
614 case 't': *p++ = '\t'; break;
615 case 'n': *p++ = '\n'; break;
616 case 'r': *p++ = '\r'; break;
617 case 'v': *p++ = '\013'; break; /* VT */
618 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
619 case '0': case '1': case '2': case '3':
620 case '4': case '5': case '6': case '7':
621 c = s[-1] - '0';
622 if (s < end && '0' <= *s && *s <= '7') {
623 c = (c<<3) + *s++ - '0';
624 if (s < end && '0' <= *s && *s <= '7')
625 c = (c<<3) + *s++ - '0';
626 }
627 *p++ = c;
628 break;
629 case 'x':
630 if (s+1 < end &&
631 isxdigit(Py_CHARMASK(s[0])) &&
632 isxdigit(Py_CHARMASK(s[1])))
633 {
634 unsigned int x = 0;
635 c = Py_CHARMASK(*s);
636 s++;
637 if (isdigit(c))
638 x = c - '0';
639 else if (islower(c))
640 x = 10 + c - 'a';
641 else
642 x = 10 + c - 'A';
643 x = x << 4;
644 c = Py_CHARMASK(*s);
645 s++;
646 if (isdigit(c))
647 x += c - '0';
648 else if (islower(c))
649 x += 10 + c - 'a';
650 else
651 x += 10 + c - 'A';
652 *p++ = x;
653 break;
654 }
655 if (!errors || strcmp(errors, "strict") == 0) {
656 PyErr_SetString(PyExc_ValueError,
657 "invalid \\x escape");
658 goto failed;
659 }
660 if (strcmp(errors, "replace") == 0) {
661 *p++ = '?';
662 } else if (strcmp(errors, "ignore") == 0)
663 /* do nothing */;
664 else {
665 PyErr_Format(PyExc_ValueError,
666 "decoding error; "
667 "unknown error handling code: %.400s",
668 errors);
669 goto failed;
670 }
671#ifndef Py_USING_UNICODE
672 case 'u':
673 case 'U':
674 case 'N':
675 if (unicode) {
676 PyErr_SetString(PyExc_ValueError,
677 "Unicode escapes not legal "
678 "when Unicode disabled");
679 goto failed;
680 }
681#endif
682 default:
683 *p++ = '\\';
684 s--;
685 goto non_esc; /* an arbitry number of unescaped
686 UTF-8 bytes may follow. */
687 }
688 }
689 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000690 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000691 return v;
692 failed:
693 Py_DECREF(v);
694 return NULL;
695}
696
697/* -------------------------------------------------------------------- */
698/* object api */
699
Christian Heimes1a6387e2008-03-26 12:49:49 +0000700static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000701string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000702{
Christian Heimes44720832008-05-26 13:01:01 +0000703 char *s;
704 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000705 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000706 return -1;
707 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000708}
709
Christian Heimes44720832008-05-26 13:01:01 +0000710static /*const*/ char *
711string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000712{
Christian Heimes44720832008-05-26 13:01:01 +0000713 char *s;
714 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000715 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000716 return NULL;
717 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000718}
719
720Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000721PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000723 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000724 return string_getsize(op);
725 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726}
727
Christian Heimes44720832008-05-26 13:01:01 +0000728/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000729PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000730{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000731 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000732 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000733 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000734}
735
736int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000737PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000738 register char **s,
739 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740{
Christian Heimes44720832008-05-26 13:01:01 +0000741 if (s == NULL) {
742 PyErr_BadInternalCall();
743 return -1;
744 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000746 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000747#ifdef Py_USING_UNICODE
748 if (PyUnicode_Check(obj)) {
749 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
750 if (obj == NULL)
751 return -1;
752 }
753 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754#endif
Christian Heimes44720832008-05-26 13:01:01 +0000755 {
756 PyErr_Format(PyExc_TypeError,
757 "expected string or Unicode object, "
758 "%.200s found", Py_TYPE(obj)->tp_name);
759 return -1;
760 }
761 }
762
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000763 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000764 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000765 *len = PyString_GET_SIZE(obj);
766 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000767 PyErr_SetString(PyExc_TypeError,
768 "expected string without null bytes");
769 return -1;
770 }
771 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000772}
773
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774/* -------------------------------------------------------------------- */
775/* Methods */
776
Christian Heimes44720832008-05-26 13:01:01 +0000777#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000778#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000779
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780#include "stringlib/count.h"
781#include "stringlib/find.h"
782#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000784#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000785#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787
788
789static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000790string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Christian Heimes44720832008-05-26 13:01:01 +0000792 Py_ssize_t i, str_len;
793 char c;
794 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795
Christian Heimes44720832008-05-26 13:01:01 +0000796 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000797 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000798 int ret;
799 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000800 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000801 if (op == NULL)
802 return -1;
803 ret = string_print(op, fp, flags);
804 Py_DECREF(op);
805 return ret;
806 }
807 if (flags & Py_PRINT_RAW) {
808 char *data = op->ob_sval;
809 Py_ssize_t size = Py_SIZE(op);
810 Py_BEGIN_ALLOW_THREADS
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
821#ifdef __VMS
822 if (size) fwrite(data, (int)size, 1, fp);
823#else
824 fwrite(data, 1, (int)size, fp);
825#endif
826 Py_END_ALLOW_THREADS
827 return 0;
828 }
829
830 /* figure out which quote to use; single is preferred */
831 quote = '\'';
832 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
833 !memchr(op->ob_sval, '"', Py_SIZE(op)))
834 quote = '"';
835
836 str_len = Py_SIZE(op);
837 Py_BEGIN_ALLOW_THREADS
838 fputc(quote, fp);
839 for (i = 0; i < str_len; i++) {
840 /* Since strings are immutable and the caller should have a
841 reference, accessing the interal buffer should not be an issue
842 with the GIL released. */
843 c = op->ob_sval[i];
844 if (c == quote || c == '\\')
845 fprintf(fp, "\\%c", c);
846 else if (c == '\t')
847 fprintf(fp, "\\t");
848 else if (c == '\n')
849 fprintf(fp, "\\n");
850 else if (c == '\r')
851 fprintf(fp, "\\r");
852 else if (c < ' ' || c >= 0x7f)
853 fprintf(fp, "\\x%02x", c & 0xff);
854 else
855 fputc(c, fp);
856 }
857 fputc(quote, fp);
858 Py_END_ALLOW_THREADS
859 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000860}
861
Christian Heimes44720832008-05-26 13:01:01 +0000862PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000863PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000864{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000865 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000866 size_t newsize = 2 + 4 * Py_SIZE(op);
867 PyObject *v;
868 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
869 PyErr_SetString(PyExc_OverflowError,
870 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000871 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000872 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000873 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000874 if (v == NULL) {
875 return NULL;
876 }
877 else {
878 register Py_ssize_t i;
879 register char c;
880 register char *p;
881 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000882
Christian Heimes44720832008-05-26 13:01:01 +0000883 /* figure out which quote to use; single is preferred */
884 quote = '\'';
885 if (smartquotes &&
886 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
887 !memchr(op->ob_sval, '"', Py_SIZE(op)))
888 quote = '"';
889
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000890 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000891 *p++ = quote;
892 for (i = 0; i < Py_SIZE(op); i++) {
893 /* There's at least enough room for a hex escape
894 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000895 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000896 c = op->ob_sval[i];
897 if (c == quote || c == '\\')
898 *p++ = '\\', *p++ = c;
899 else if (c == '\t')
900 *p++ = '\\', *p++ = 't';
901 else if (c == '\n')
902 *p++ = '\\', *p++ = 'n';
903 else if (c == '\r')
904 *p++ = '\\', *p++ = 'r';
905 else if (c < ' ' || c >= 0x7f) {
906 /* For performance, we don't want to call
907 PyOS_snprintf here (extra layers of
908 function call). */
909 sprintf(p, "\\x%02x", c & 0xff);
910 p += 4;
911 }
912 else
913 *p++ = c;
914 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000915 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000916 *p++ = quote;
917 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000918 _PyString_Resize(
919 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000920 return v;
921 }
922}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923
924static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000925string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000926{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000927 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928}
929
Christian Heimes1a6387e2008-03-26 12:49:49 +0000930static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000931string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000932{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000933 assert(PyString_Check(s));
934 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000935 Py_INCREF(s);
936 return s;
937 }
938 else {
939 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000940 PyStringObject *t = (PyStringObject *) s;
941 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000942 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000943}
944
Christian Heimes44720832008-05-26 13:01:01 +0000945static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000946string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000947{
948 return Py_SIZE(a);
949}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000950
Christian Heimes44720832008-05-26 13:01:01 +0000951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000952string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000953{
954 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000955 register PyStringObject *op;
956 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000957#ifdef Py_USING_UNICODE
958 if (PyUnicode_Check(bb))
959 return PyUnicode_Concat((PyObject *)a, bb);
960#endif
961 if (PyByteArray_Check(bb))
962 return PyByteArray_Concat((PyObject *)a, bb);
963 PyErr_Format(PyExc_TypeError,
964 "cannot concatenate 'str' and '%.200s' objects",
965 Py_TYPE(bb)->tp_name);
966 return NULL;
967 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000968#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000969 /* Optimize cases with empty left or right operand */
970 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000971 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000972 if (Py_SIZE(a) == 0) {
973 Py_INCREF(bb);
974 return bb;
975 }
976 Py_INCREF(a);
977 return (PyObject *)a;
978 }
979 size = Py_SIZE(a) + Py_SIZE(b);
980 if (size < 0) {
981 PyErr_SetString(PyExc_OverflowError,
982 "strings are too large to concat");
983 return NULL;
984 }
985
986 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000987 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +0000988 if (op == NULL)
989 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000990 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000991 op->ob_shash = -1;
992 op->ob_sstate = SSTATE_NOT_INTERNED;
993 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
994 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
995 op->ob_sval[size] = '\0';
996 return (PyObject *) op;
997#undef b
998}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000999
Christian Heimes44720832008-05-26 13:01:01 +00001000static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001001string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001002{
1003 register Py_ssize_t i;
1004 register Py_ssize_t j;
1005 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001006 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001007 size_t nbytes;
1008 if (n < 0)
1009 n = 0;
1010 /* watch out for overflows: the size can overflow int,
1011 * and the # of bytes needed can overflow size_t
1012 */
1013 size = Py_SIZE(a) * n;
1014 if (n && size / n != Py_SIZE(a)) {
1015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001019 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020 Py_INCREF(a);
1021 return (PyObject *)a;
1022 }
1023 nbytes = (size_t)size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001024 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001025 PyErr_SetString(PyExc_OverflowError,
1026 "repeated string is too long");
1027 return NULL;
1028 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001029 op = (PyStringObject *)
1030 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001031 if (op == NULL)
1032 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001033 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001034 op->ob_shash = -1;
1035 op->ob_sstate = SSTATE_NOT_INTERNED;
1036 op->ob_sval[size] = '\0';
1037 if (Py_SIZE(a) == 1 && n > 0) {
1038 memset(op->ob_sval, a->ob_sval[0] , n);
1039 return (PyObject *) op;
1040 }
1041 i = 0;
1042 if (i < size) {
1043 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1044 i = Py_SIZE(a);
1045 }
1046 while (i < size) {
1047 j = (i <= size-i) ? i : size-i;
1048 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1049 i += j;
1050 }
1051 return (PyObject *) op;
1052}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001053
Christian Heimes44720832008-05-26 13:01:01 +00001054/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1055
1056static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001057string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001058 register Py_ssize_t j)
1059 /* j -- may be negative! */
1060{
1061 if (i < 0)
1062 i = 0;
1063 if (j < 0)
1064 j = 0; /* Avoid signed/unsigned bug in next line */
1065 if (j > Py_SIZE(a))
1066 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001067 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001068 /* It's the same as a */
1069 Py_INCREF(a);
1070 return (PyObject *)a;
1071 }
1072 if (j < i)
1073 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001075}
1076
1077static int
1078string_contains(PyObject *str_obj, PyObject *sub_obj)
1079{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001080 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001081#ifdef Py_USING_UNICODE
1082 if (PyUnicode_Check(sub_obj))
1083 return PyUnicode_Contains(str_obj, sub_obj);
1084#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001085 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001086 PyErr_Format(PyExc_TypeError,
1087 "'in <string>' requires string as left operand, "
1088 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1089 return -1;
1090 }
1091 }
1092
1093 return stringlib_contains_obj(str_obj, sub_obj);
1094}
1095
1096static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001097string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001098{
1099 char pchar;
1100 PyObject *v;
1101 if (i < 0 || i >= Py_SIZE(a)) {
1102 PyErr_SetString(PyExc_IndexError, "string index out of range");
1103 return NULL;
1104 }
1105 pchar = a->ob_sval[i];
1106 v = (PyObject *)characters[pchar & UCHAR_MAX];
1107 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001108 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001109 else {
1110#ifdef COUNT_ALLOCS
1111 one_strings++;
1112#endif
1113 Py_INCREF(v);
1114 }
1115 return v;
1116}
1117
1118static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001119string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001120{
1121 int c;
1122 Py_ssize_t len_a, len_b;
1123 Py_ssize_t min_len;
1124 PyObject *result;
1125
1126 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001127 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001128 result = Py_NotImplemented;
1129 goto out;
1130 }
1131 if (a == b) {
1132 switch (op) {
1133 case Py_EQ:case Py_LE:case Py_GE:
1134 result = Py_True;
1135 goto out;
1136 case Py_NE:case Py_LT:case Py_GT:
1137 result = Py_False;
1138 goto out;
1139 }
1140 }
1141 if (op == Py_EQ) {
1142 /* Supporting Py_NE here as well does not save
1143 much time, since Py_NE is rarely used. */
1144 if (Py_SIZE(a) == Py_SIZE(b)
1145 && (a->ob_sval[0] == b->ob_sval[0]
1146 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1147 result = Py_True;
1148 } else {
1149 result = Py_False;
1150 }
1151 goto out;
1152 }
1153 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1154 min_len = (len_a < len_b) ? len_a : len_b;
1155 if (min_len > 0) {
1156 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1157 if (c==0)
1158 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1159 } else
1160 c = 0;
1161 if (c == 0)
1162 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1163 switch (op) {
1164 case Py_LT: c = c < 0; break;
1165 case Py_LE: c = c <= 0; break;
1166 case Py_EQ: assert(0); break; /* unreachable */
1167 case Py_NE: c = c != 0; break;
1168 case Py_GT: c = c > 0; break;
1169 case Py_GE: c = c >= 0; break;
1170 default:
1171 result = Py_NotImplemented;
1172 goto out;
1173 }
1174 result = c ? Py_True : Py_False;
1175 out:
1176 Py_INCREF(result);
1177 return result;
1178}
1179
1180int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001181_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001182{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001183 PyStringObject *a = (PyStringObject*) o1;
1184 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001185 return Py_SIZE(a) == Py_SIZE(b)
1186 && *a->ob_sval == *b->ob_sval
1187 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1188}
1189
1190static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
1193 register Py_ssize_t len;
1194 register unsigned char *p;
1195 register long x;
1196
1197 if (a->ob_shash != -1)
1198 return a->ob_shash;
1199 len = Py_SIZE(a);
1200 p = (unsigned char *) a->ob_sval;
1201 x = *p << 7;
1202 while (--len >= 0)
1203 x = (1000003*x) ^ *p++;
1204 x ^= Py_SIZE(a);
1205 if (x == -1)
1206 x = -2;
1207 a->ob_shash = x;
1208 return x;
1209}
1210
1211static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001212string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001213{
1214 if (PyIndex_Check(item)) {
1215 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1216 if (i == -1 && PyErr_Occurred())
1217 return NULL;
1218 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001219 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001220 return string_item(self, i);
1221 }
1222 else if (PySlice_Check(item)) {
1223 Py_ssize_t start, stop, step, slicelength, cur, i;
1224 char* source_buf;
1225 char* result_buf;
1226 PyObject* result;
1227
1228 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001229 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001230 &start, &stop, &step, &slicelength) < 0) {
1231 return NULL;
1232 }
1233
1234 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001235 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001236 }
1237 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001238 slicelength == PyString_GET_SIZE(self) &&
1239 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001240 Py_INCREF(self);
1241 return (PyObject *)self;
1242 }
1243 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001244 return PyString_FromStringAndSize(
1245 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001246 slicelength);
1247 }
1248 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001250 result_buf = (char *)PyMem_Malloc(slicelength);
1251 if (result_buf == NULL)
1252 return PyErr_NoMemory();
1253
1254 for (cur = start, i = 0; i < slicelength;
1255 cur += step, i++) {
1256 result_buf[i] = source_buf[cur];
1257 }
1258
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001260 slicelength);
1261 PyMem_Free(result_buf);
1262 return result;
1263 }
1264 }
1265 else {
1266 PyErr_Format(PyExc_TypeError,
1267 "string indices must be integers, not %.200s",
1268 Py_TYPE(item)->tp_name);
1269 return NULL;
1270 }
1271}
1272
1273static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001274string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001275{
1276 if ( index != 0 ) {
1277 PyErr_SetString(PyExc_SystemError,
1278 "accessing non-existent string segment");
1279 return -1;
1280 }
1281 *ptr = (void *)self->ob_sval;
1282 return Py_SIZE(self);
1283}
1284
1285static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001286string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001287{
1288 PyErr_SetString(PyExc_TypeError,
1289 "Cannot use string as modifiable buffer");
1290 return -1;
1291}
1292
1293static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001294string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001295{
1296 if ( lenp )
1297 *lenp = Py_SIZE(self);
1298 return 1;
1299}
1300
1301static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001302string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001303{
1304 if ( index != 0 ) {
1305 PyErr_SetString(PyExc_SystemError,
1306 "accessing non-existent string segment");
1307 return -1;
1308 }
1309 *ptr = self->ob_sval;
1310 return Py_SIZE(self);
1311}
1312
1313static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001314string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001315{
1316 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1317 0, flags);
1318}
1319
1320static PySequenceMethods string_as_sequence = {
1321 (lenfunc)string_length, /*sq_length*/
1322 (binaryfunc)string_concat, /*sq_concat*/
1323 (ssizeargfunc)string_repeat, /*sq_repeat*/
1324 (ssizeargfunc)string_item, /*sq_item*/
1325 (ssizessizeargfunc)string_slice, /*sq_slice*/
1326 0, /*sq_ass_item*/
1327 0, /*sq_ass_slice*/
1328 (objobjproc)string_contains /*sq_contains*/
1329};
1330
1331static PyMappingMethods string_as_mapping = {
1332 (lenfunc)string_length,
1333 (binaryfunc)string_subscript,
1334 0,
1335};
1336
1337static PyBufferProcs string_as_buffer = {
1338 (readbufferproc)string_buffer_getreadbuf,
1339 (writebufferproc)string_buffer_getwritebuf,
1340 (segcountproc)string_buffer_getsegcount,
1341 (charbufferproc)string_buffer_getcharbuf,
1342 (getbufferproc)string_buffer_getbuffer,
1343 0, /* XXX */
1344};
1345
1346
1347
1348#define LEFTSTRIP 0
1349#define RIGHTSTRIP 1
1350#define BOTHSTRIP 2
1351
1352/* Arrays indexed by above */
1353static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1354
1355#define STRIPNAME(i) (stripformat[i]+3)
1356
Christian Heimes1a6387e2008-03-26 12:49:49 +00001357
1358/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001359#define Py_STRING_MATCH(target, offset, pattern, length) \
1360 (target[offset] == pattern[0] && \
1361 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001362 !memcmp(target+offset+1, pattern+1, length-2) )
1363
1364
Christian Heimes1a6387e2008-03-26 12:49:49 +00001365/* Overallocate the initial list to reduce the number of reallocs for small
1366 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1367 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1368 text (roughly 11 words per line) and field delimited data (usually 1-10
1369 fields). For large strings the split algorithms are bandwidth limited
1370 so increasing the preallocation likely will not improve things.*/
1371
1372#define MAX_PREALLOC 12
1373
1374/* 5 splits gives 6 elements */
1375#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001376 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001377
Christian Heimes44720832008-05-26 13:01:01 +00001378#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001379 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001380 (right) - (left)); \
1381 if (str == NULL) \
1382 goto onError; \
1383 if (PyList_Append(list, str)) { \
1384 Py_DECREF(str); \
1385 goto onError; \
1386 } \
1387 else \
1388 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389
Christian Heimes44720832008-05-26 13:01:01 +00001390#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001391 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001392 (right) - (left)); \
1393 if (str == NULL) \
1394 goto onError; \
1395 if (count < MAX_PREALLOC) { \
1396 PyList_SET_ITEM(list, count, str); \
1397 } else { \
1398 if (PyList_Append(list, str)) { \
1399 Py_DECREF(str); \
1400 goto onError; \
1401 } \
1402 else \
1403 Py_DECREF(str); \
1404 } \
1405 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001406
1407/* Always force the list to the expected size. */
1408#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1409
Christian Heimes44720832008-05-26 13:01:01 +00001410#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1411#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1413#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001414
1415Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001416split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001417{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001418 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001419 Py_ssize_t i, j, count=0;
1420 PyObject *str;
1421 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001422
Christian Heimes44720832008-05-26 13:01:01 +00001423 if (list == NULL)
1424 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001425
Christian Heimes44720832008-05-26 13:01:01 +00001426 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001427
Christian Heimes44720832008-05-26 13:01:01 +00001428 while (maxsplit-- > 0) {
1429 SKIP_SPACE(s, i, len);
1430 if (i==len) break;
1431 j = i; i++;
1432 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001433 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001434 /* No whitespace in self, so just use it as list[0] */
1435 Py_INCREF(self);
1436 PyList_SET_ITEM(list, 0, (PyObject *)self);
1437 count++;
1438 break;
1439 }
1440 SPLIT_ADD(s, j, i);
1441 }
1442
1443 if (i < len) {
1444 /* Only occurs when maxsplit was reached */
1445 /* Skip any remaining whitespace and copy to end of string */
1446 SKIP_SPACE(s, i, len);
1447 if (i != len)
1448 SPLIT_ADD(s, i, len);
1449 }
1450 FIX_PREALLOC_SIZE(list);
1451 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001452 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001453 Py_DECREF(list);
1454 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001455}
1456
Christian Heimes1a6387e2008-03-26 12:49:49 +00001457Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001458split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001460 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001461 register Py_ssize_t i, j, count=0;
1462 PyObject *str;
1463 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001464
Christian Heimes44720832008-05-26 13:01:01 +00001465 if (list == NULL)
1466 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467
Christian Heimes44720832008-05-26 13:01:01 +00001468 i = j = 0;
1469 while ((j < len) && (maxcount-- > 0)) {
1470 for(; j<len; j++) {
1471 /* I found that using memchr makes no difference */
1472 if (s[j] == ch) {
1473 SPLIT_ADD(s, i, j);
1474 i = j = j + 1;
1475 break;
1476 }
1477 }
1478 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001479 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001480 /* ch not in self, so just use self as list[0] */
1481 Py_INCREF(self);
1482 PyList_SET_ITEM(list, 0, (PyObject *)self);
1483 count++;
1484 }
1485 else if (i <= len) {
1486 SPLIT_ADD(s, i, len);
1487 }
1488 FIX_PREALLOC_SIZE(list);
1489 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
1491 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001492 Py_DECREF(list);
1493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001494}
1495
1496PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001497"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498\n\
Christian Heimes44720832008-05-26 13:01:01 +00001499Return a list of the words in the string S, using sep as the\n\
1500delimiter string. If maxsplit is given, at most maxsplit\n\
1501splits are done. If sep is not specified or is None, any\n\
1502whitespace string is a separator and empty strings are removed\n\
1503from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001506string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001508 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001509 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001510 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001511 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001512#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001513 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514#endif
1515
Christian Heimes44720832008-05-26 13:01:01 +00001516 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1517 return NULL;
1518 if (maxsplit < 0)
1519 maxsplit = PY_SSIZE_T_MAX;
1520 if (subobj == Py_None)
1521 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001522 if (PyString_Check(subobj)) {
1523 sub = PyString_AS_STRING(subobj);
1524 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001525 }
1526#ifdef Py_USING_UNICODE
1527 else if (PyUnicode_Check(subobj))
1528 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1529#endif
1530 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1531 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001532
Christian Heimes44720832008-05-26 13:01:01 +00001533 if (n == 0) {
1534 PyErr_SetString(PyExc_ValueError, "empty separator");
1535 return NULL;
1536 }
1537 else if (n == 1)
1538 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539
Christian Heimes44720832008-05-26 13:01:01 +00001540 list = PyList_New(PREALLOC_SIZE(maxsplit));
1541 if (list == NULL)
1542 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543
1544#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001545 i = j = 0;
1546 while (maxsplit-- > 0) {
1547 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1548 if (pos < 0)
1549 break;
1550 j = i+pos;
1551 SPLIT_ADD(s, i, j);
1552 i = j + n;
1553 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001554#else
Christian Heimes44720832008-05-26 13:01:01 +00001555 i = j = 0;
1556 while ((j+n <= len) && (maxsplit-- > 0)) {
1557 for (; j+n <= len; j++) {
1558 if (Py_STRING_MATCH(s, j, sub, n)) {
1559 SPLIT_ADD(s, i, j);
1560 i = j = j + n;
1561 break;
1562 }
1563 }
1564 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565#endif
Christian Heimes44720832008-05-26 13:01:01 +00001566 SPLIT_ADD(s, i, len);
1567 FIX_PREALLOC_SIZE(list);
1568 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569
Christian Heimes44720832008-05-26 13:01:01 +00001570 onError:
1571 Py_DECREF(list);
1572 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001573}
1574
1575PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001576"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001577\n\
Christian Heimes44720832008-05-26 13:01:01 +00001578Searches for the separator sep in S, and returns the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001579the separator itself, and the part after it. If the separator is not\n\
Christian Heimes44720832008-05-26 13:01:01 +00001580found, returns S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581
1582static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001583string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001584{
Christian Heimes44720832008-05-26 13:01:01 +00001585 const char *sep;
1586 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001587
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001588 if (PyString_Check(sep_obj)) {
1589 sep = PyString_AS_STRING(sep_obj);
1590 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001591 }
1592#ifdef Py_USING_UNICODE
1593 else if (PyUnicode_Check(sep_obj))
1594 return PyUnicode_Partition((PyObject *) self, sep_obj);
1595#endif
1596 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1597 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001598
Christian Heimes44720832008-05-26 13:01:01 +00001599 return stringlib_partition(
1600 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001601 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001602 sep_obj, sep, sep_len
1603 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001604}
1605
1606PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001607"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608\n\
Christian Heimes44720832008-05-26 13:01:01 +00001609Searches for the separator sep in S, starting at the end of S, and returns\n\
1610the part before it, the separator itself, and the part after it. If the\n\
1611separator is not found, returns two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001612
1613static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001614string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001615{
Christian Heimes44720832008-05-26 13:01:01 +00001616 const char *sep;
1617 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001618
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001619 if (PyString_Check(sep_obj)) {
1620 sep = PyString_AS_STRING(sep_obj);
1621 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001622 }
1623#ifdef Py_USING_UNICODE
1624 else if (PyUnicode_Check(sep_obj))
1625 return PyUnicode_Partition((PyObject *) self, sep_obj);
1626#endif
1627 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1628 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001629
Christian Heimes44720832008-05-26 13:01:01 +00001630 return stringlib_rpartition(
1631 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001632 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001633 sep_obj, sep, sep_len
1634 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001635}
1636
1637Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001640 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001641 Py_ssize_t i, j, count=0;
1642 PyObject *str;
1643 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644
Christian Heimes44720832008-05-26 13:01:01 +00001645 if (list == NULL)
1646 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001647
Christian Heimes44720832008-05-26 13:01:01 +00001648 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001649
Christian Heimes44720832008-05-26 13:01:01 +00001650 while (maxsplit-- > 0) {
1651 RSKIP_SPACE(s, i);
1652 if (i<0) break;
1653 j = i; i--;
1654 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001655 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001656 /* No whitespace in self, so just use it as list[0] */
1657 Py_INCREF(self);
1658 PyList_SET_ITEM(list, 0, (PyObject *)self);
1659 count++;
1660 break;
1661 }
1662 SPLIT_ADD(s, i + 1, j + 1);
1663 }
1664 if (i >= 0) {
1665 /* Only occurs when maxsplit was reached */
1666 /* Skip any remaining whitespace and copy to beginning of string */
1667 RSKIP_SPACE(s, i);
1668 if (i >= 0)
1669 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001670
Christian Heimes44720832008-05-26 13:01:01 +00001671 }
1672 FIX_PREALLOC_SIZE(list);
1673 if (PyList_Reverse(list) < 0)
1674 goto onError;
1675 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001676 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001677 Py_DECREF(list);
1678 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001679}
1680
1681Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001682rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001683{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001684 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001685 register Py_ssize_t i, j, count=0;
1686 PyObject *str;
1687 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001688
Christian Heimes44720832008-05-26 13:01:01 +00001689 if (list == NULL)
1690 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001691
Christian Heimes44720832008-05-26 13:01:01 +00001692 i = j = len - 1;
1693 while ((i >= 0) && (maxcount-- > 0)) {
1694 for (; i >= 0; i--) {
1695 if (s[i] == ch) {
1696 SPLIT_ADD(s, i + 1, j + 1);
1697 j = i = i - 1;
1698 break;
1699 }
1700 }
1701 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001702 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001703 /* ch not in self, so just use self as list[0] */
1704 Py_INCREF(self);
1705 PyList_SET_ITEM(list, 0, (PyObject *)self);
1706 count++;
1707 }
1708 else if (j >= -1) {
1709 SPLIT_ADD(s, 0, j + 1);
1710 }
1711 FIX_PREALLOC_SIZE(list);
1712 if (PyList_Reverse(list) < 0)
1713 goto onError;
1714 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 onError:
1717 Py_DECREF(list);
1718 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001719}
1720
1721PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001722"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001723\n\
Christian Heimes44720832008-05-26 13:01:01 +00001724Return a list of the words in the string S, using sep as the\n\
1725delimiter string, starting at the end of the string and working\n\
1726to the front. If maxsplit is given, at most maxsplit splits are\n\
1727done. If sep is not specified or is None, any whitespace string\n\
1728is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001729
1730static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001731string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001732{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001733 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001734 Py_ssize_t maxsplit = -1, count=0;
1735 const char *s, *sub;
1736 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001737
Christian Heimes44720832008-05-26 13:01:01 +00001738 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1739 return NULL;
1740 if (maxsplit < 0)
1741 maxsplit = PY_SSIZE_T_MAX;
1742 if (subobj == Py_None)
1743 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001744 if (PyString_Check(subobj)) {
1745 sub = PyString_AS_STRING(subobj);
1746 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001747 }
1748#ifdef Py_USING_UNICODE
1749 else if (PyUnicode_Check(subobj))
1750 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1751#endif
1752 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1753 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001754
Christian Heimes44720832008-05-26 13:01:01 +00001755 if (n == 0) {
1756 PyErr_SetString(PyExc_ValueError, "empty separator");
1757 return NULL;
1758 }
1759 else if (n == 1)
1760 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 list = PyList_New(PREALLOC_SIZE(maxsplit));
1763 if (list == NULL)
1764 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001765
Christian Heimes44720832008-05-26 13:01:01 +00001766 j = len;
1767 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001768
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001769 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001770 while ( (i >= 0) && (maxsplit-- > 0) ) {
1771 for (; i>=0; i--) {
1772 if (Py_STRING_MATCH(s, i, sub, n)) {
1773 SPLIT_ADD(s, i + n, j);
1774 j = i;
1775 i -= n;
1776 break;
1777 }
1778 }
1779 }
1780 SPLIT_ADD(s, 0, j);
1781 FIX_PREALLOC_SIZE(list);
1782 if (PyList_Reverse(list) < 0)
1783 goto onError;
1784 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
1786onError:
Christian Heimes44720832008-05-26 13:01:01 +00001787 Py_DECREF(list);
1788 return NULL;
1789}
1790
1791
1792PyDoc_STRVAR(join__doc__,
1793"S.join(sequence) -> string\n\
1794\n\
1795Return a string which is the concatenation of the strings in the\n\
1796sequence. The separator between elements is S.");
1797
1798static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001799string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001800{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001801 char *sep = PyString_AS_STRING(self);
1802 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001803 PyObject *res = NULL;
1804 char *p;
1805 Py_ssize_t seqlen = 0;
1806 size_t sz = 0;
1807 Py_ssize_t i;
1808 PyObject *seq, *item;
1809
1810 seq = PySequence_Fast(orig, "");
1811 if (seq == NULL) {
1812 return NULL;
1813 }
1814
1815 seqlen = PySequence_Size(seq);
1816 if (seqlen == 0) {
1817 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001818 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001819 }
1820 if (seqlen == 1) {
1821 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001822 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001823 Py_INCREF(item);
1824 Py_DECREF(seq);
1825 return item;
1826 }
1827 }
1828
1829 /* There are at least two things to join, or else we have a subclass
1830 * of the builtin types in the sequence.
1831 * Do a pre-pass to figure out the total amount of space we'll
1832 * need (sz), see whether any argument is absurd, and defer to
1833 * the Unicode join if appropriate.
1834 */
1835 for (i = 0; i < seqlen; i++) {
1836 const size_t old_sz = sz;
1837 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001838 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001839#ifdef Py_USING_UNICODE
1840 if (PyUnicode_Check(item)) {
1841 /* Defer to Unicode join.
1842 * CAUTION: There's no gurantee that the
1843 * original sequence can be iterated over
1844 * again, so we must pass seq here.
1845 */
1846 PyObject *result;
1847 result = PyUnicode_Join((PyObject *)self, seq);
1848 Py_DECREF(seq);
1849 return result;
1850 }
1851#endif
1852 PyErr_Format(PyExc_TypeError,
1853 "sequence item %zd: expected string,"
1854 " %.80s found",
1855 i, Py_TYPE(item)->tp_name);
1856 Py_DECREF(seq);
1857 return NULL;
1858 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001859 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001860 if (i != 0)
1861 sz += seplen;
1862 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1863 PyErr_SetString(PyExc_OverflowError,
1864 "join() result is too long for a Python string");
1865 Py_DECREF(seq);
1866 return NULL;
1867 }
1868 }
1869
1870 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001871 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001872 if (res == NULL) {
1873 Py_DECREF(seq);
1874 return NULL;
1875 }
1876
1877 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001878 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001879 for (i = 0; i < seqlen; ++i) {
1880 size_t n;
1881 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001882 n = PyString_GET_SIZE(item);
1883 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001884 p += n;
1885 if (i < seqlen - 1) {
1886 Py_MEMCPY(p, sep, seplen);
1887 p += seplen;
1888 }
1889 }
1890
1891 Py_DECREF(seq);
1892 return res;
1893}
1894
1895PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001896_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001897{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001898 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001899 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001900 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001901}
1902
1903Py_LOCAL_INLINE(void)
1904string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1905{
1906 if (*end > len)
1907 *end = len;
1908 else if (*end < 0)
1909 *end += len;
1910 if (*end < 0)
1911 *end = 0;
1912 if (*start < 0)
1913 *start += len;
1914 if (*start < 0)
1915 *start = 0;
1916}
1917
1918Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001919string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001920{
1921 PyObject *subobj;
1922 const char *sub;
1923 Py_ssize_t sub_len;
1924 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1925 PyObject *obj_start=Py_None, *obj_end=Py_None;
1926
1927 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1928 &obj_start, &obj_end))
1929 return -2;
1930 /* To support None in "start" and "end" arguments, meaning
1931 the same as if they were not passed.
1932 */
1933 if (obj_start != Py_None)
1934 if (!_PyEval_SliceIndex(obj_start, &start))
1935 return -2;
1936 if (obj_end != Py_None)
1937 if (!_PyEval_SliceIndex(obj_end, &end))
1938 return -2;
1939
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001940 if (PyString_Check(subobj)) {
1941 sub = PyString_AS_STRING(subobj);
1942 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001943 }
1944#ifdef Py_USING_UNICODE
1945 else if (PyUnicode_Check(subobj))
1946 return PyUnicode_Find(
1947 (PyObject *)self, subobj, start, end, dir);
1948#endif
1949 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1950 /* XXX - the "expected a character buffer object" is pretty
1951 confusing for a non-expert. remap to something else ? */
1952 return -2;
1953
1954 if (dir > 0)
1955 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001956 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001957 sub, sub_len, start, end);
1958 else
1959 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001960 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001961 sub, sub_len, start, end);
1962}
1963
1964
1965PyDoc_STRVAR(find__doc__,
1966"S.find(sub [,start [,end]]) -> int\n\
1967\n\
1968Return the lowest index in S where substring sub is found,\n\
1969such that sub is contained within s[start:end]. Optional\n\
1970arguments start and end are interpreted as in slice notation.\n\
1971\n\
1972Return -1 on failure.");
1973
1974static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001975string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001976{
1977 Py_ssize_t result = string_find_internal(self, args, +1);
1978 if (result == -2)
1979 return NULL;
1980 return PyInt_FromSsize_t(result);
1981}
1982
1983
1984PyDoc_STRVAR(index__doc__,
1985"S.index(sub [,start [,end]]) -> int\n\
1986\n\
1987Like S.find() but raise ValueError when the substring is not found.");
1988
1989static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001991{
1992 Py_ssize_t result = string_find_internal(self, args, +1);
1993 if (result == -2)
1994 return NULL;
1995 if (result == -1) {
1996 PyErr_SetString(PyExc_ValueError,
1997 "substring not found");
1998 return NULL;
1999 }
2000 return PyInt_FromSsize_t(result);
2001}
2002
2003
2004PyDoc_STRVAR(rfind__doc__,
2005"S.rfind(sub [,start [,end]]) -> int\n\
2006\n\
2007Return the highest index in S where substring sub is found,\n\
2008such that sub is contained within s[start:end]. Optional\n\
2009arguments start and end are interpreted as in slice notation.\n\
2010\n\
2011Return -1 on failure.");
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 Py_ssize_t result = string_find_internal(self, args, -1);
2017 if (result == -2)
2018 return NULL;
2019 return PyInt_FromSsize_t(result);
2020}
2021
2022
2023PyDoc_STRVAR(rindex__doc__,
2024"S.rindex(sub [,start [,end]]) -> int\n\
2025\n\
2026Like S.rfind() but raise ValueError when the substring is not found.");
2027
2028static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002029string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002030{
2031 Py_ssize_t result = string_find_internal(self, args, -1);
2032 if (result == -2)
2033 return NULL;
2034 if (result == -1) {
2035 PyErr_SetString(PyExc_ValueError,
2036 "substring not found");
2037 return NULL;
2038 }
2039 return PyInt_FromSsize_t(result);
2040}
2041
2042
2043Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002046 char *s = PyString_AS_STRING(self);
2047 Py_ssize_t len = PyString_GET_SIZE(self);
2048 char *sep = PyString_AS_STRING(sepobj);
2049 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002050 Py_ssize_t i, j;
2051
2052 i = 0;
2053 if (striptype != RIGHTSTRIP) {
2054 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2055 i++;
2056 }
2057 }
2058
2059 j = len;
2060 if (striptype != LEFTSTRIP) {
2061 do {
2062 j--;
2063 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2064 j++;
2065 }
2066
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002067 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002068 Py_INCREF(self);
2069 return (PyObject*)self;
2070 }
2071 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002072 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002073}
2074
2075
2076Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002077do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002078{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079 char *s = PyString_AS_STRING(self);
2080 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002081
2082 i = 0;
2083 if (striptype != RIGHTSTRIP) {
2084 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2085 i++;
2086 }
2087 }
2088
2089 j = len;
2090 if (striptype != LEFTSTRIP) {
2091 do {
2092 j--;
2093 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2094 j++;
2095 }
2096
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002097 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002098 Py_INCREF(self);
2099 return (PyObject*)self;
2100 }
2101 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002102 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002103}
2104
2105
2106Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002107do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002108{
2109 PyObject *sep = NULL;
2110
2111 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2112 return NULL;
2113
2114 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002115 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002116 return do_xstrip(self, striptype, sep);
2117#ifdef Py_USING_UNICODE
2118 else if (PyUnicode_Check(sep)) {
2119 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2120 PyObject *res;
2121 if (uniself==NULL)
2122 return NULL;
2123 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2124 striptype, sep);
2125 Py_DECREF(uniself);
2126 return res;
2127 }
2128#endif
2129 PyErr_Format(PyExc_TypeError,
2130#ifdef Py_USING_UNICODE
2131 "%s arg must be None, str or unicode",
2132#else
2133 "%s arg must be None or str",
2134#endif
2135 STRIPNAME(striptype));
2136 return NULL;
2137 }
2138
2139 return do_strip(self, striptype);
2140}
2141
2142
2143PyDoc_STRVAR(strip__doc__,
2144"S.strip([chars]) -> string or unicode\n\
2145\n\
2146Return a copy of the string S with leading and trailing\n\
2147whitespace removed.\n\
2148If chars is given and not None, remove characters in chars instead.\n\
2149If chars is unicode, S will be converted to unicode before stripping");
2150
2151static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002152string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002153{
2154 if (PyTuple_GET_SIZE(args) == 0)
2155 return do_strip(self, BOTHSTRIP); /* Common case */
2156 else
2157 return do_argstrip(self, BOTHSTRIP, args);
2158}
2159
2160
2161PyDoc_STRVAR(lstrip__doc__,
2162"S.lstrip([chars]) -> string or unicode\n\
2163\n\
2164Return a copy of the string S with leading whitespace removed.\n\
2165If chars is given and not None, remove characters in chars instead.\n\
2166If chars is unicode, S will be converted to unicode before stripping");
2167
2168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002169string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002170{
2171 if (PyTuple_GET_SIZE(args) == 0)
2172 return do_strip(self, LEFTSTRIP); /* Common case */
2173 else
2174 return do_argstrip(self, LEFTSTRIP, args);
2175}
2176
2177
2178PyDoc_STRVAR(rstrip__doc__,
2179"S.rstrip([chars]) -> string or unicode\n\
2180\n\
2181Return a copy of the string S with trailing whitespace removed.\n\
2182If chars is given and not None, remove characters in chars instead.\n\
2183If chars is unicode, S will be converted to unicode before stripping");
2184
2185static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002186string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002187{
2188 if (PyTuple_GET_SIZE(args) == 0)
2189 return do_strip(self, RIGHTSTRIP); /* Common case */
2190 else
2191 return do_argstrip(self, RIGHTSTRIP, args);
2192}
2193
2194
2195PyDoc_STRVAR(lower__doc__,
2196"S.lower() -> string\n\
2197\n\
2198Return a copy of the string S converted to lowercase.");
2199
2200/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2201#ifndef _tolower
2202#define _tolower tolower
2203#endif
2204
2205static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002206string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002207{
2208 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002209 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002210 PyObject *newobj;
2211
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002212 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002213 if (!newobj)
2214 return NULL;
2215
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002216 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002217
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002218 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002219
2220 for (i = 0; i < n; i++) {
2221 int c = Py_CHARMASK(s[i]);
2222 if (isupper(c))
2223 s[i] = _tolower(c);
2224 }
2225
2226 return newobj;
2227}
2228
2229PyDoc_STRVAR(upper__doc__,
2230"S.upper() -> string\n\
2231\n\
2232Return a copy of the string S converted to uppercase.");
2233
2234#ifndef _toupper
2235#define _toupper toupper
2236#endif
2237
2238static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002239string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002240{
2241 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002242 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002243 PyObject *newobj;
2244
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002245 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002246 if (!newobj)
2247 return NULL;
2248
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002249 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002251 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002252
2253 for (i = 0; i < n; i++) {
2254 int c = Py_CHARMASK(s[i]);
2255 if (islower(c))
2256 s[i] = _toupper(c);
2257 }
2258
2259 return newobj;
2260}
2261
2262PyDoc_STRVAR(title__doc__,
2263"S.title() -> string\n\
2264\n\
2265Return a titlecased version of S, i.e. words start with uppercase\n\
2266characters, all remaining cased characters have lowercase.");
2267
2268static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002269string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002270{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002271 char *s = PyString_AS_STRING(self), *s_new;
2272 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002273 int previous_is_cased = 0;
2274 PyObject *newobj;
2275
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002276 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002277 if (newobj == NULL)
2278 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002279 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002280 for (i = 0; i < n; i++) {
2281 int c = Py_CHARMASK(*s++);
2282 if (islower(c)) {
2283 if (!previous_is_cased)
2284 c = toupper(c);
2285 previous_is_cased = 1;
2286 } else if (isupper(c)) {
2287 if (previous_is_cased)
2288 c = tolower(c);
2289 previous_is_cased = 1;
2290 } else
2291 previous_is_cased = 0;
2292 *s_new++ = c;
2293 }
2294 return newobj;
2295}
2296
2297PyDoc_STRVAR(capitalize__doc__,
2298"S.capitalize() -> string\n\
2299\n\
2300Return a copy of the string S with only its first character\n\
2301capitalized.");
2302
2303static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002304string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002305{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002306 char *s = PyString_AS_STRING(self), *s_new;
2307 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002308 PyObject *newobj;
2309
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002310 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002311 if (newobj == NULL)
2312 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002313 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002314 if (0 < n) {
2315 int c = Py_CHARMASK(*s++);
2316 if (islower(c))
2317 *s_new = toupper(c);
2318 else
2319 *s_new = c;
2320 s_new++;
2321 }
2322 for (i = 1; i < n; i++) {
2323 int c = Py_CHARMASK(*s++);
2324 if (isupper(c))
2325 *s_new = tolower(c);
2326 else
2327 *s_new = c;
2328 s_new++;
2329 }
2330 return newobj;
2331}
2332
2333
2334PyDoc_STRVAR(count__doc__,
2335"S.count(sub[, start[, end]]) -> int\n\
2336\n\
2337Return the number of non-overlapping occurrences of substring sub in\n\
2338string S[start:end]. Optional arguments start and end are interpreted\n\
2339as in slice notation.");
2340
2341static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002342string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002343{
2344 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002345 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002346 Py_ssize_t sub_len;
2347 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2348
2349 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2350 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2351 return NULL;
2352
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002353 if (PyString_Check(sub_obj)) {
2354 sub = PyString_AS_STRING(sub_obj);
2355 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002356 }
2357#ifdef Py_USING_UNICODE
2358 else if (PyUnicode_Check(sub_obj)) {
2359 Py_ssize_t count;
2360 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2361 if (count == -1)
2362 return NULL;
2363 else
2364 return PyInt_FromSsize_t(count);
2365 }
2366#endif
2367 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2368 return NULL;
2369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002370 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002371
2372 return PyInt_FromSsize_t(
2373 stringlib_count(str + start, end - start, sub, sub_len)
2374 );
2375}
2376
2377PyDoc_STRVAR(swapcase__doc__,
2378"S.swapcase() -> string\n\
2379\n\
2380Return a copy of the string S with uppercase characters\n\
2381converted to lowercase and vice versa.");
2382
2383static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002384string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002385{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002386 char *s = PyString_AS_STRING(self), *s_new;
2387 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002388 PyObject *newobj;
2389
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002390 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002391 if (newobj == NULL)
2392 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002393 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002394 for (i = 0; i < n; i++) {
2395 int c = Py_CHARMASK(*s++);
2396 if (islower(c)) {
2397 *s_new = toupper(c);
2398 }
2399 else if (isupper(c)) {
2400 *s_new = tolower(c);
2401 }
2402 else
2403 *s_new = c;
2404 s_new++;
2405 }
2406 return newobj;
2407}
2408
2409
2410PyDoc_STRVAR(translate__doc__,
2411"S.translate(table [,deletechars]) -> string\n\
2412\n\
2413Return a copy of the string S, where all characters occurring\n\
2414in the optional argument deletechars are removed, and the\n\
2415remaining characters have been mapped through the given\n\
2416translation table, which must be a string of length 256.");
2417
2418static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002419string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002420{
2421 register char *input, *output;
2422 const char *table;
2423 register Py_ssize_t i, c, changed = 0;
2424 PyObject *input_obj = (PyObject*)self;
2425 const char *output_start, *del_table=NULL;
2426 Py_ssize_t inlen, tablen, dellen = 0;
2427 PyObject *result;
2428 int trans_table[256];
2429 PyObject *tableobj, *delobj = NULL;
2430
2431 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2432 &tableobj, &delobj))
2433 return NULL;
2434
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002435 if (PyString_Check(tableobj)) {
2436 table = PyString_AS_STRING(tableobj);
2437 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002438 }
2439 else if (tableobj == Py_None) {
2440 table = NULL;
2441 tablen = 256;
2442 }
2443#ifdef Py_USING_UNICODE
2444 else if (PyUnicode_Check(tableobj)) {
2445 /* Unicode .translate() does not support the deletechars
2446 parameter; instead a mapping to None will cause characters
2447 to be deleted. */
2448 if (delobj != NULL) {
2449 PyErr_SetString(PyExc_TypeError,
2450 "deletions are implemented differently for unicode");
2451 return NULL;
2452 }
2453 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2454 }
2455#endif
2456 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2457 return NULL;
2458
2459 if (tablen != 256) {
2460 PyErr_SetString(PyExc_ValueError,
2461 "translation table must be 256 characters long");
2462 return NULL;
2463 }
2464
2465 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002466 if (PyString_Check(delobj)) {
2467 del_table = PyString_AS_STRING(delobj);
2468 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002469 }
2470#ifdef Py_USING_UNICODE
2471 else if (PyUnicode_Check(delobj)) {
2472 PyErr_SetString(PyExc_TypeError,
2473 "deletions are implemented differently for unicode");
2474 return NULL;
2475 }
2476#endif
2477 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2478 return NULL;
2479 }
2480 else {
2481 del_table = NULL;
2482 dellen = 0;
2483 }
2484
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002485 inlen = PyString_GET_SIZE(input_obj);
2486 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002487 if (result == NULL)
2488 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002489 output_start = output = PyString_AsString(result);
2490 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002491
2492 if (dellen == 0 && table != NULL) {
2493 /* If no deletions are required, use faster code */
2494 for (i = inlen; --i >= 0; ) {
2495 c = Py_CHARMASK(*input++);
2496 if (Py_CHARMASK((*output++ = table[c])) != c)
2497 changed = 1;
2498 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002499 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002500 return result;
2501 Py_DECREF(result);
2502 Py_INCREF(input_obj);
2503 return input_obj;
2504 }
2505
2506 if (table == NULL) {
2507 for (i = 0; i < 256; i++)
2508 trans_table[i] = Py_CHARMASK(i);
2509 } else {
2510 for (i = 0; i < 256; i++)
2511 trans_table[i] = Py_CHARMASK(table[i]);
2512 }
2513
2514 for (i = 0; i < dellen; i++)
2515 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2516
2517 for (i = inlen; --i >= 0; ) {
2518 c = Py_CHARMASK(*input++);
2519 if (trans_table[c] != -1)
2520 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2521 continue;
2522 changed = 1;
2523 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002524 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529 /* Fix the size of the resulting string */
2530 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002531 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002532 return result;
2533}
2534
2535
2536#define FORWARD 1
2537#define REVERSE -1
2538
2539/* find and count characters and substrings */
2540
2541#define findchar(target, target_len, c) \
2542 ((char *)memchr((const void *)(target), c, target_len))
2543
2544/* String ops must return a string. */
2545/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002546Py_LOCAL(PyStringObject *)
2547return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002548{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002550 Py_INCREF(self);
2551 return self;
2552 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002553 return (PyStringObject *)PyString_FromStringAndSize(
2554 PyString_AS_STRING(self),
2555 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002556}
2557
2558Py_LOCAL_INLINE(Py_ssize_t)
2559countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2560{
2561 Py_ssize_t count=0;
2562 const char *start=target;
2563 const char *end=target+target_len;
2564
2565 while ( (start=findchar(start, end-start, c)) != NULL ) {
2566 count++;
2567 if (count >= maxcount)
2568 break;
2569 start += 1;
2570 }
2571 return count;
2572}
2573
2574Py_LOCAL(Py_ssize_t)
2575findstring(const char *target, Py_ssize_t target_len,
2576 const char *pattern, Py_ssize_t pattern_len,
2577 Py_ssize_t start,
2578 Py_ssize_t end,
2579 int direction)
2580{
2581 if (start < 0) {
2582 start += target_len;
2583 if (start < 0)
2584 start = 0;
2585 }
2586 if (end > target_len) {
2587 end = target_len;
2588 } else if (end < 0) {
2589 end += target_len;
2590 if (end < 0)
2591 end = 0;
2592 }
2593
2594 /* zero-length substrings always match at the first attempt */
2595 if (pattern_len == 0)
2596 return (direction > 0) ? start : end;
2597
2598 end -= pattern_len;
2599
2600 if (direction < 0) {
2601 for (; end >= start; end--)
2602 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2603 return end;
2604 } else {
2605 for (; start <= end; start++)
2606 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2607 return start;
2608 }
2609 return -1;
2610}
2611
2612Py_LOCAL_INLINE(Py_ssize_t)
2613countstring(const char *target, Py_ssize_t target_len,
2614 const char *pattern, Py_ssize_t pattern_len,
2615 Py_ssize_t start,
2616 Py_ssize_t end,
2617 int direction, Py_ssize_t maxcount)
2618{
2619 Py_ssize_t count=0;
2620
2621 if (start < 0) {
2622 start += target_len;
2623 if (start < 0)
2624 start = 0;
2625 }
2626 if (end > target_len) {
2627 end = target_len;
2628 } else if (end < 0) {
2629 end += target_len;
2630 if (end < 0)
2631 end = 0;
2632 }
2633
2634 /* zero-length substrings match everywhere */
2635 if (pattern_len == 0 || maxcount == 0) {
2636 if (target_len+1 < maxcount)
2637 return target_len+1;
2638 return maxcount;
2639 }
2640
2641 end -= pattern_len;
2642 if (direction < 0) {
2643 for (; (end >= start); end--)
2644 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2645 count++;
2646 if (--maxcount <= 0) break;
2647 end -= pattern_len-1;
2648 }
2649 } else {
2650 for (; (start <= end); start++)
2651 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2652 count++;
2653 if (--maxcount <= 0)
2654 break;
2655 start += pattern_len-1;
2656 }
2657 }
2658 return count;
2659}
2660
2661
2662/* Algorithms for different cases of string replacement */
2663
2664/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002665Py_LOCAL(PyStringObject *)
2666replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002667 const char *to_s, Py_ssize_t to_len,
2668 Py_ssize_t maxcount)
2669{
2670 char *self_s, *result_s;
2671 Py_ssize_t self_len, result_len;
2672 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002673 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002674
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002675 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002676
2677 /* 1 at the end plus 1 after every character */
2678 count = self_len+1;
2679 if (maxcount < count)
2680 count = maxcount;
2681
2682 /* Check for overflow */
2683 /* result_len = count * to_len + self_len; */
2684 product = count * to_len;
2685 if (product / to_len != count) {
2686 PyErr_SetString(PyExc_OverflowError,
2687 "replace string is too long");
2688 return NULL;
2689 }
2690 result_len = product + self_len;
2691 if (result_len < 0) {
2692 PyErr_SetString(PyExc_OverflowError,
2693 "replace string is too long");
2694 return NULL;
2695 }
2696
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002697 if (! (result = (PyStringObject *)
2698 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002699 return NULL;
2700
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002701 self_s = PyString_AS_STRING(self);
2702 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002703
2704 /* TODO: special case single character, which doesn't need memcpy */
2705
2706 /* Lay the first one down (guaranteed this will occur) */
2707 Py_MEMCPY(result_s, to_s, to_len);
2708 result_s += to_len;
2709 count -= 1;
2710
2711 for (i=0; i<count; i++) {
2712 *result_s++ = *self_s++;
2713 Py_MEMCPY(result_s, to_s, to_len);
2714 result_s += to_len;
2715 }
2716
2717 /* Copy the rest of the original string */
2718 Py_MEMCPY(result_s, self_s, self_len-i);
2719
2720 return result;
2721}
2722
2723/* Special case for deleting a single character */
2724/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725Py_LOCAL(PyStringObject *)
2726replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002727 char from_c, Py_ssize_t maxcount)
2728{
2729 char *self_s, *result_s;
2730 char *start, *next, *end;
2731 Py_ssize_t self_len, result_len;
2732 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002733 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002734
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002735 self_len = PyString_GET_SIZE(self);
2736 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002737
2738 count = countchar(self_s, self_len, from_c, maxcount);
2739 if (count == 0) {
2740 return return_self(self);
2741 }
2742
2743 result_len = self_len - count; /* from_len == 1 */
2744 assert(result_len>=0);
2745
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002746 if ( (result = (PyStringObject *)
2747 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002748 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002749 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002750
2751 start = self_s;
2752 end = self_s + self_len;
2753 while (count-- > 0) {
2754 next = findchar(start, end-start, from_c);
2755 if (next == NULL)
2756 break;
2757 Py_MEMCPY(result_s, start, next-start);
2758 result_s += (next-start);
2759 start = next+1;
2760 }
2761 Py_MEMCPY(result_s, start, end-start);
2762
2763 return result;
2764}
2765
2766/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2767
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002768Py_LOCAL(PyStringObject *)
2769replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002770 const char *from_s, Py_ssize_t from_len,
2771 Py_ssize_t maxcount) {
2772 char *self_s, *result_s;
2773 char *start, *next, *end;
2774 Py_ssize_t self_len, result_len;
2775 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002776 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002777
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002778 self_len = PyString_GET_SIZE(self);
2779 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002780
2781 count = countstring(self_s, self_len,
2782 from_s, from_len,
2783 0, self_len, 1,
2784 maxcount);
2785
2786 if (count == 0) {
2787 /* no matches */
2788 return return_self(self);
2789 }
2790
2791 result_len = self_len - (count * from_len);
2792 assert (result_len>=0);
2793
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002794 if ( (result = (PyStringObject *)
2795 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002796 return NULL;
2797
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002798 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002799
2800 start = self_s;
2801 end = self_s + self_len;
2802 while (count-- > 0) {
2803 offset = findstring(start, end-start,
2804 from_s, from_len,
2805 0, end-start, FORWARD);
2806 if (offset == -1)
2807 break;
2808 next = start + offset;
2809
2810 Py_MEMCPY(result_s, start, next-start);
2811
2812 result_s += (next-start);
2813 start = next+from_len;
2814 }
2815 Py_MEMCPY(result_s, start, end-start);
2816 return result;
2817}
2818
2819/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002820Py_LOCAL(PyStringObject *)
2821replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002822 char from_c, char to_c,
2823 Py_ssize_t maxcount)
2824{
2825 char *self_s, *result_s, *start, *end, *next;
2826 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002827 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002828
2829 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002830 self_s = PyString_AS_STRING(self);
2831 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002832
2833 next = findchar(self_s, self_len, from_c);
2834
2835 if (next == NULL) {
2836 /* No matches; return the original string */
2837 return return_self(self);
2838 }
2839
2840 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002841 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002842 if (result == NULL)
2843 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002845 Py_MEMCPY(result_s, self_s, self_len);
2846
2847 /* change everything in-place, starting with this one */
2848 start = result_s + (next-self_s);
2849 *start = to_c;
2850 start++;
2851 end = result_s + self_len;
2852
2853 while (--maxcount > 0) {
2854 next = findchar(start, end-start, from_c);
2855 if (next == NULL)
2856 break;
2857 *next = to_c;
2858 start = next+1;
2859 }
2860
2861 return result;
2862}
2863
2864/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865Py_LOCAL(PyStringObject *)
2866replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002867 const char *from_s, Py_ssize_t from_len,
2868 const char *to_s, Py_ssize_t to_len,
2869 Py_ssize_t maxcount)
2870{
2871 char *result_s, *start, *end;
2872 char *self_s;
2873 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002874 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002875
2876 /* The result string will be the same size */
2877
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002878 self_s = PyString_AS_STRING(self);
2879 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002880
2881 offset = findstring(self_s, self_len,
2882 from_s, from_len,
2883 0, self_len, FORWARD);
2884 if (offset == -1) {
2885 /* No matches; return the original string */
2886 return return_self(self);
2887 }
2888
2889 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002890 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002891 if (result == NULL)
2892 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002893 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002894 Py_MEMCPY(result_s, self_s, self_len);
2895
2896 /* change everything in-place, starting with this one */
2897 start = result_s + offset;
2898 Py_MEMCPY(start, to_s, from_len);
2899 start += from_len;
2900 end = result_s + self_len;
2901
2902 while ( --maxcount > 0) {
2903 offset = findstring(start, end-start,
2904 from_s, from_len,
2905 0, end-start, FORWARD);
2906 if (offset==-1)
2907 break;
2908 Py_MEMCPY(start+offset, to_s, from_len);
2909 start += offset+from_len;
2910 }
2911
2912 return result;
2913}
2914
2915/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002916Py_LOCAL(PyStringObject *)
2917replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002918 char from_c,
2919 const char *to_s, Py_ssize_t to_len,
2920 Py_ssize_t maxcount)
2921{
2922 char *self_s, *result_s;
2923 char *start, *next, *end;
2924 Py_ssize_t self_len, result_len;
2925 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002926 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002927
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002928 self_s = PyString_AS_STRING(self);
2929 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002930
2931 count = countchar(self_s, self_len, from_c, maxcount);
2932 if (count == 0) {
2933 /* no matches, return unchanged */
2934 return return_self(self);
2935 }
2936
2937 /* use the difference between current and new, hence the "-1" */
2938 /* result_len = self_len + count * (to_len-1) */
2939 product = count * (to_len-1);
2940 if (product / (to_len-1) != count) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
2944 result_len = self_len + product;
2945 if (result_len < 0) {
2946 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2947 return NULL;
2948 }
2949
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002950 if ( (result = (PyStringObject *)
2951 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002952 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002954
2955 start = self_s;
2956 end = self_s + self_len;
2957 while (count-- > 0) {
2958 next = findchar(start, end-start, from_c);
2959 if (next == NULL)
2960 break;
2961
2962 if (next == start) {
2963 /* replace with the 'to' */
2964 Py_MEMCPY(result_s, to_s, to_len);
2965 result_s += to_len;
2966 start += 1;
2967 } else {
2968 /* copy the unchanged old then the 'to' */
2969 Py_MEMCPY(result_s, start, next-start);
2970 result_s += (next-start);
2971 Py_MEMCPY(result_s, to_s, to_len);
2972 result_s += to_len;
2973 start = next+1;
2974 }
2975 }
2976 /* Copy the remainder of the remaining string */
2977 Py_MEMCPY(result_s, start, end-start);
2978
2979 return result;
2980}
2981
2982/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002983Py_LOCAL(PyStringObject *)
2984replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002985 const char *from_s, Py_ssize_t from_len,
2986 const char *to_s, Py_ssize_t to_len,
2987 Py_ssize_t maxcount) {
2988 char *self_s, *result_s;
2989 char *start, *next, *end;
2990 Py_ssize_t self_len, result_len;
2991 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002992 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002993
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002994 self_s = PyString_AS_STRING(self);
2995 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002996
2997 count = countstring(self_s, self_len,
2998 from_s, from_len,
2999 0, self_len, FORWARD, maxcount);
3000 if (count == 0) {
3001 /* no matches, return unchanged */
3002 return return_self(self);
3003 }
3004
3005 /* Check for overflow */
3006 /* result_len = self_len + count * (to_len-from_len) */
3007 product = count * (to_len-from_len);
3008 if (product / (to_len-from_len) != count) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012 result_len = self_len + product;
3013 if (result_len < 0) {
3014 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015 return NULL;
3016 }
3017
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 if ( (result = (PyStringObject *)
3019 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003020 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003021 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003022
3023 start = self_s;
3024 end = self_s + self_len;
3025 while (count-- > 0) {
3026 offset = findstring(start, end-start,
3027 from_s, from_len,
3028 0, end-start, FORWARD);
3029 if (offset == -1)
3030 break;
3031 next = start+offset;
3032 if (next == start) {
3033 /* replace with the 'to' */
3034 Py_MEMCPY(result_s, to_s, to_len);
3035 result_s += to_len;
3036 start += from_len;
3037 } else {
3038 /* copy the unchanged old then the 'to' */
3039 Py_MEMCPY(result_s, start, next-start);
3040 result_s += (next-start);
3041 Py_MEMCPY(result_s, to_s, to_len);
3042 result_s += to_len;
3043 start = next+from_len;
3044 }
3045 }
3046 /* Copy the remainder of the remaining string */
3047 Py_MEMCPY(result_s, start, end-start);
3048
3049 return result;
3050}
3051
3052
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003053Py_LOCAL(PyStringObject *)
3054replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003055 const char *from_s, Py_ssize_t from_len,
3056 const char *to_s, Py_ssize_t to_len,
3057 Py_ssize_t maxcount)
3058{
3059 if (maxcount < 0) {
3060 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003061 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003062 /* nothing to do; return the original string */
3063 return return_self(self);
3064 }
3065
3066 if (maxcount == 0 ||
3067 (from_len == 0 && to_len == 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self);
3070 }
3071
3072 /* Handle zero-length special cases */
3073
3074 if (from_len == 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self, to_s, to_len, maxcount);
3079 }
3080
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003084 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003085 return return_self(self);
3086 }
3087
3088 if (to_len == 0) {
3089 /* delete all occurances of 'from' string */
3090 if (from_len == 1) {
3091 return replace_delete_single_character(
3092 self, from_s[0], maxcount);
3093 } else {
3094 return replace_delete_substring(self, from_s, from_len, maxcount);
3095 }
3096 }
3097
3098 /* Handle special case where both strings have the same length */
3099
3100 if (from_len == to_len) {
3101 if (from_len == 1) {
3102 return replace_single_character_in_place(
3103 self,
3104 from_s[0],
3105 to_s[0],
3106 maxcount);
3107 } else {
3108 return replace_substring_in_place(
3109 self, from_s, from_len, to_s, to_len, maxcount);
3110 }
3111 }
3112
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len == 1) {
3115 return replace_single_character(self, from_s[0],
3116 to_s, to_len, maxcount);
3117 } else {
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3120 }
3121}
3122
3123PyDoc_STRVAR(replace__doc__,
3124"S.replace (old, new[, count]) -> string\n\
3125\n\
3126Return a copy of string S with all occurrences of substring\n\
3127old replaced by new. If the optional argument count is\n\
3128given, only the first count occurrences are replaced.");
3129
3130static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003131string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003132{
3133 Py_ssize_t count = -1;
3134 PyObject *from, *to;
3135 const char *from_s, *to_s;
3136 Py_ssize_t from_len, to_len;
3137
3138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3139 return NULL;
3140
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003141 if (PyString_Check(from)) {
3142 from_s = PyString_AS_STRING(from);
3143 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003144 }
3145#ifdef Py_USING_UNICODE
3146 if (PyUnicode_Check(from))
3147 return PyUnicode_Replace((PyObject *)self,
3148 from, to, count);
3149#endif
3150 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3151 return NULL;
3152
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003153 if (PyString_Check(to)) {
3154 to_s = PyString_AS_STRING(to);
3155 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003156 }
3157#ifdef Py_USING_UNICODE
3158 else if (PyUnicode_Check(to))
3159 return PyUnicode_Replace((PyObject *)self,
3160 from, to, count);
3161#endif
3162 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3163 return NULL;
3164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003165 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003166 from_s, from_len,
3167 to_s, to_len, count);
3168}
3169
3170/** End DALKE **/
3171
3172/* Matches the end (direction >= 0) or start (direction < 0) of self
3173 * against substr, using the start and end arguments. Returns
3174 * -1 on error, 0 if not found and 1 if found.
3175 */
3176Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003178 Py_ssize_t end, int direction)
3179{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003180 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003181 Py_ssize_t slen;
3182 const char* sub;
3183 const char* str;
3184
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003185 if (PyString_Check(substr)) {
3186 sub = PyString_AS_STRING(substr);
3187 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003188 }
3189#ifdef Py_USING_UNICODE
3190 else if (PyUnicode_Check(substr))
3191 return PyUnicode_Tailmatch((PyObject *)self,
3192 substr, start, end, direction);
3193#endif
3194 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3195 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003196 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003197
3198 string_adjust_indices(&start, &end, len);
3199
3200 if (direction < 0) {
3201 /* startswith */
3202 if (start+slen > len)
3203 return 0;
3204 } else {
3205 /* endswith */
3206 if (end-start < slen || start > len)
3207 return 0;
3208
3209 if (end-slen > start)
3210 start = end - slen;
3211 }
3212 if (end-start >= slen)
3213 return ! memcmp(str+start, sub, slen);
3214 return 0;
3215}
3216
3217
3218PyDoc_STRVAR(startswith__doc__,
3219"S.startswith(prefix[, start[, end]]) -> bool\n\
3220\n\
3221Return True if S starts with the specified prefix, False otherwise.\n\
3222With optional start, test S beginning at that position.\n\
3223With optional end, stop comparing S at that position.\n\
3224prefix can also be a tuple of strings to try.");
3225
3226static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003228{
3229 Py_ssize_t start = 0;
3230 Py_ssize_t end = PY_SSIZE_T_MAX;
3231 PyObject *subobj;
3232 int result;
3233
3234 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3236 return NULL;
3237 if (PyTuple_Check(subobj)) {
3238 Py_ssize_t i;
3239 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3240 result = _string_tailmatch(self,
3241 PyTuple_GET_ITEM(subobj, i),
3242 start, end, -1);
3243 if (result == -1)
3244 return NULL;
3245 else if (result) {
3246 Py_RETURN_TRUE;
3247 }
3248 }
3249 Py_RETURN_FALSE;
3250 }
3251 result = _string_tailmatch(self, subobj, start, end, -1);
3252 if (result == -1)
3253 return NULL;
3254 else
3255 return PyBool_FromLong(result);
3256}
3257
3258
3259PyDoc_STRVAR(endswith__doc__,
3260"S.endswith(suffix[, start[, end]]) -> bool\n\
3261\n\
3262Return True if S ends with the specified suffix, False otherwise.\n\
3263With optional start, test S beginning at that position.\n\
3264With optional end, stop comparing S at that position.\n\
3265suffix can also be a tuple of strings to try.");
3266
3267static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003268string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003269{
3270 Py_ssize_t start = 0;
3271 Py_ssize_t end = PY_SSIZE_T_MAX;
3272 PyObject *subobj;
3273 int result;
3274
3275 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3276 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3277 return NULL;
3278 if (PyTuple_Check(subobj)) {
3279 Py_ssize_t i;
3280 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3281 result = _string_tailmatch(self,
3282 PyTuple_GET_ITEM(subobj, i),
3283 start, end, +1);
3284 if (result == -1)
3285 return NULL;
3286 else if (result) {
3287 Py_RETURN_TRUE;
3288 }
3289 }
3290 Py_RETURN_FALSE;
3291 }
3292 result = _string_tailmatch(self, subobj, start, end, +1);
3293 if (result == -1)
3294 return NULL;
3295 else
3296 return PyBool_FromLong(result);
3297}
3298
3299
3300PyDoc_STRVAR(encode__doc__,
3301"S.encode([encoding[,errors]]) -> object\n\
3302\n\
3303Encodes S using the codec registered for encoding. encoding defaults\n\
3304to the default encoding. errors may be given to set a different error\n\
3305handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307'xmlcharrefreplace' as well as any other name registered with\n\
3308codecs.register_error that is able to handle UnicodeEncodeErrors.");
3309
3310static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003311string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003312{
3313 char *encoding = NULL;
3314 char *errors = NULL;
3315 PyObject *v;
3316
3317 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3318 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003319 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003320 if (v == NULL)
3321 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003323 PyErr_Format(PyExc_TypeError,
3324 "encoder did not return a string/unicode object "
3325 "(type=%.400s)",
3326 Py_TYPE(v)->tp_name);
3327 Py_DECREF(v);
3328 return NULL;
3329 }
3330 return v;
3331
3332 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003333 return NULL;
3334}
3335
Christian Heimes44720832008-05-26 13:01:01 +00003336
3337PyDoc_STRVAR(decode__doc__,
3338"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339\n\
Christian Heimes44720832008-05-26 13:01:01 +00003340Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003341to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003342handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344as well as any other name registerd with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003345able to handle UnicodeDecodeErrors.");
3346
3347static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349{
Christian Heimes44720832008-05-26 13:01:01 +00003350 char *encoding = NULL;
3351 char *errors = NULL;
3352 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353
3354 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3355 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003356 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003357 if (v == NULL)
3358 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003360 PyErr_Format(PyExc_TypeError,
3361 "decoder did not return a string/unicode object "
3362 "(type=%.400s)",
3363 Py_TYPE(v)->tp_name);
3364 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003366 }
3367 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368
Christian Heimes44720832008-05-26 13:01:01 +00003369 onError:
3370 return NULL;
3371}
3372
3373
3374PyDoc_STRVAR(expandtabs__doc__,
3375"S.expandtabs([tabsize]) -> string\n\
3376\n\
3377Return a copy of S where all tab characters are expanded using spaces.\n\
3378If tabsize is not given, a tab size of 8 characters is assumed.");
3379
3380static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003381string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003382{
3383 const char *e, *p, *qe;
3384 char *q;
3385 Py_ssize_t i, j, incr;
3386 PyObject *u;
3387 int tabsize = 8;
3388
3389 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3390 return NULL;
3391
3392 /* First pass: determine size of output string */
3393 i = 0; /* chars up to and including most recent \n or \r */
3394 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003395 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3396 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003397 if (*p == '\t') {
3398 if (tabsize > 0) {
3399 incr = tabsize - (j % tabsize);
3400 if (j > PY_SSIZE_T_MAX - incr)
3401 goto overflow1;
3402 j += incr;
3403 }
3404 }
3405 else {
3406 if (j > PY_SSIZE_T_MAX - 1)
3407 goto overflow1;
3408 j++;
3409 if (*p == '\n' || *p == '\r') {
3410 if (i > PY_SSIZE_T_MAX - j)
3411 goto overflow1;
3412 i += j;
3413 j = 0;
3414 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003415 }
Christian Heimes44720832008-05-26 13:01:01 +00003416
3417 if (i > PY_SSIZE_T_MAX - j)
3418 goto overflow1;
3419
3420 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003422 if (!u)
3423 return NULL;
3424
3425 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003426 q = PyString_AS_STRING(u); /* next output char */
3427 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003428
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003429 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003430 if (*p == '\t') {
3431 if (tabsize > 0) {
3432 i = tabsize - (j % tabsize);
3433 j += i;
3434 while (i--) {
3435 if (q >= qe)
3436 goto overflow2;
3437 *q++ = ' ';
3438 }
3439 }
3440 }
3441 else {
3442 if (q >= qe)
3443 goto overflow2;
3444 *q++ = *p;
3445 j++;
3446 if (*p == '\n' || *p == '\r')
3447 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003448 }
Christian Heimes44720832008-05-26 13:01:01 +00003449
3450 return u;
3451
3452 overflow2:
3453 Py_DECREF(u);
3454 overflow1:
3455 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3456 return NULL;
3457}
3458
3459Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003460pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003461{
3462 PyObject *u;
3463
3464 if (left < 0)
3465 left = 0;
3466 if (right < 0)
3467 right = 0;
3468
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003469 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003470 Py_INCREF(self);
3471 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003472 }
3473
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003474 u = PyString_FromStringAndSize(NULL,
3475 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003476 if (u) {
3477 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003478 memset(PyString_AS_STRING(u), fill, left);
3479 Py_MEMCPY(PyString_AS_STRING(u) + left,
3480 PyString_AS_STRING(self),
3481 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003482 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003484 fill, right);
3485 }
3486
3487 return u;
3488}
3489
3490PyDoc_STRVAR(ljust__doc__,
3491"S.ljust(width[, fillchar]) -> string\n"
3492"\n"
3493"Return S left justified in a string of length width. Padding is\n"
3494"done using the specified fill character (default is a space).");
3495
3496static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003497string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003498{
3499 Py_ssize_t width;
3500 char fillchar = ' ';
3501
3502 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3503 return NULL;
3504
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003505 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003506 Py_INCREF(self);
3507 return (PyObject*) self;
3508 }
3509
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003510 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003511}
3512
3513
3514PyDoc_STRVAR(rjust__doc__,
3515"S.rjust(width[, fillchar]) -> string\n"
3516"\n"
3517"Return S right justified in a string of length width. Padding is\n"
3518"done using the specified fill character (default is a space)");
3519
3520static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003521string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003522{
3523 Py_ssize_t width;
3524 char fillchar = ' ';
3525
3526 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3527 return NULL;
3528
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003529 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003530 Py_INCREF(self);
3531 return (PyObject*) self;
3532 }
3533
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003534 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003535}
3536
3537
3538PyDoc_STRVAR(center__doc__,
3539"S.center(width[, fillchar]) -> string\n"
3540"\n"
3541"Return S centered in a string of length width. Padding is\n"
3542"done using the specified fill character (default is a space)");
3543
3544static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003545string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003546{
3547 Py_ssize_t marg, left;
3548 Py_ssize_t width;
3549 char fillchar = ' ';
3550
3551 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3552 return NULL;
3553
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003554 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003555 Py_INCREF(self);
3556 return (PyObject*) self;
3557 }
3558
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003559 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003560 left = marg / 2 + (marg & width & 1);
3561
3562 return pad(self, left, marg - left, fillchar);
3563}
3564
3565PyDoc_STRVAR(zfill__doc__,
3566"S.zfill(width) -> string\n"
3567"\n"
3568"Pad a numeric string S with zeros on the left, to fill a field\n"
3569"of the specified width. The string S is never truncated.");
3570
3571static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003572string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003573{
3574 Py_ssize_t fill;
3575 PyObject *s;
3576 char *p;
3577 Py_ssize_t width;
3578
3579 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3580 return NULL;
3581
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003582 if (PyString_GET_SIZE(self) >= width) {
3583 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003584 Py_INCREF(self);
3585 return (PyObject*) self;
3586 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003588 return PyString_FromStringAndSize(
3589 PyString_AS_STRING(self),
3590 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003591 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003592 }
3593
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003594 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003595
Christian Heimes44720832008-05-26 13:01:01 +00003596 s = pad(self, fill, 0, '0');
3597
3598 if (s == NULL)
3599 return NULL;
3600
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003601 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003602 if (p[fill] == '+' || p[fill] == '-') {
3603 /* move sign to beginning of string */
3604 p[0] = p[fill];
3605 p[fill] = '0';
3606 }
3607
3608 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003609}
3610
Christian Heimes44720832008-05-26 13:01:01 +00003611PyDoc_STRVAR(isspace__doc__,
3612"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003613\n\
Christian Heimes44720832008-05-26 13:01:01 +00003614Return True if all characters in S are whitespace\n\
3615and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616
Christian Heimes44720832008-05-26 13:01:01 +00003617static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003618string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619{
Christian Heimes44720832008-05-26 13:01:01 +00003620 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003621 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003622 register const unsigned char *e;
3623
3624 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003625 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003626 isspace(*p))
3627 return PyBool_FromLong(1);
3628
3629 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003630 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003631 return PyBool_FromLong(0);
3632
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003633 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003634 for (; p < e; p++) {
3635 if (!isspace(*p))
3636 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637 }
Christian Heimes44720832008-05-26 13:01:01 +00003638 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003639}
3640
Christian Heimes44720832008-05-26 13:01:01 +00003641
3642PyDoc_STRVAR(isalpha__doc__,
3643"S.isalpha() -> bool\n\
3644\n\
3645Return True if all characters in S are alphabetic\n\
3646and there is at least one character in S, False otherwise.");
3647
3648static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003650{
Christian Heimes44720832008-05-26 13:01:01 +00003651 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003652 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003653 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003654
Christian Heimes44720832008-05-26 13:01:01 +00003655 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003656 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003657 isalpha(*p))
3658 return PyBool_FromLong(1);
3659
3660 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003661 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003662 return PyBool_FromLong(0);
3663
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003664 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003665 for (; p < e; p++) {
3666 if (!isalpha(*p))
3667 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003668 }
Christian Heimes44720832008-05-26 13:01:01 +00003669 return PyBool_FromLong(1);
3670}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003671
Christian Heimes44720832008-05-26 13:01:01 +00003672
3673PyDoc_STRVAR(isalnum__doc__,
3674"S.isalnum() -> bool\n\
3675\n\
3676Return True if all characters in S are alphanumeric\n\
3677and there is at least one character in S, False otherwise.");
3678
3679static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003681{
3682 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003683 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003684 register const unsigned char *e;
3685
3686 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003687 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003688 isalnum(*p))
3689 return PyBool_FromLong(1);
3690
3691 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003692 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003693 return PyBool_FromLong(0);
3694
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003695 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003696 for (; p < e; p++) {
3697 if (!isalnum(*p))
3698 return PyBool_FromLong(0);
3699 }
3700 return PyBool_FromLong(1);
3701}
3702
3703
3704PyDoc_STRVAR(isdigit__doc__,
3705"S.isdigit() -> bool\n\
3706\n\
3707Return True if all characters in S are digits\n\
3708and there is at least one character in S, False otherwise.");
3709
3710static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003712{
3713 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003714 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003715 register const unsigned char *e;
3716
3717 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003718 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003719 isdigit(*p))
3720 return PyBool_FromLong(1);
3721
3722 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003723 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003724 return PyBool_FromLong(0);
3725
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003726 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003727 for (; p < e; p++) {
3728 if (!isdigit(*p))
3729 return PyBool_FromLong(0);
3730 }
3731 return PyBool_FromLong(1);
3732}
3733
3734
3735PyDoc_STRVAR(islower__doc__,
3736"S.islower() -> bool\n\
3737\n\
3738Return True if all cased characters in S are lowercase and there is\n\
3739at least one cased character in S, False otherwise.");
3740
3741static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003743{
3744 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003745 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003746 register const unsigned char *e;
3747 int cased;
3748
3749 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003750 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003751 return PyBool_FromLong(islower(*p) != 0);
3752
3753 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003754 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003755 return PyBool_FromLong(0);
3756
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003757 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003758 cased = 0;
3759 for (; p < e; p++) {
3760 if (isupper(*p))
3761 return PyBool_FromLong(0);
3762 else if (!cased && islower(*p))
3763 cased = 1;
3764 }
3765 return PyBool_FromLong(cased);
3766}
3767
3768
3769PyDoc_STRVAR(isupper__doc__,
3770"S.isupper() -> bool\n\
3771\n\
3772Return True if all cased characters in S are uppercase and there is\n\
3773at least one cased character in S, False otherwise.");
3774
3775static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003776string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003777{
3778 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003779 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003780 register const unsigned char *e;
3781 int cased;
3782
3783 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003784 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003785 return PyBool_FromLong(isupper(*p) != 0);
3786
3787 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003788 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003789 return PyBool_FromLong(0);
3790
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003791 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003792 cased = 0;
3793 for (; p < e; p++) {
3794 if (islower(*p))
3795 return PyBool_FromLong(0);
3796 else if (!cased && isupper(*p))
3797 cased = 1;
3798 }
3799 return PyBool_FromLong(cased);
3800}
3801
3802
3803PyDoc_STRVAR(istitle__doc__,
3804"S.istitle() -> bool\n\
3805\n\
3806Return True if S is a titlecased string and there is at least one\n\
3807character in S, i.e. uppercase characters may only follow uncased\n\
3808characters and lowercase characters only cased ones. Return False\n\
3809otherwise.");
3810
3811static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003812string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003813{
3814 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003815 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003816 register const unsigned char *e;
3817 int cased, previous_is_cased;
3818
3819 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003820 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003821 return PyBool_FromLong(isupper(*p) != 0);
3822
3823 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003824 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003825 return PyBool_FromLong(0);
3826
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003827 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003828 cased = 0;
3829 previous_is_cased = 0;
3830 for (; p < e; p++) {
3831 register const unsigned char ch = *p;
3832
3833 if (isupper(ch)) {
3834 if (previous_is_cased)
3835 return PyBool_FromLong(0);
3836 previous_is_cased = 1;
3837 cased = 1;
3838 }
3839 else if (islower(ch)) {
3840 if (!previous_is_cased)
3841 return PyBool_FromLong(0);
3842 previous_is_cased = 1;
3843 cased = 1;
3844 }
3845 else
3846 previous_is_cased = 0;
3847 }
3848 return PyBool_FromLong(cased);
3849}
3850
3851
3852PyDoc_STRVAR(splitlines__doc__,
3853"S.splitlines([keepends]) -> list of strings\n\
3854\n\
3855Return a list of the lines in S, breaking at line boundaries.\n\
3856Line breaks are not included in the resulting list unless keepends\n\
3857is given and true.");
3858
3859static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003860string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003861{
3862 register Py_ssize_t i;
3863 register Py_ssize_t j;
3864 Py_ssize_t len;
3865 int keepends = 0;
3866 PyObject *list;
3867 PyObject *str;
3868 char *data;
3869
3870 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3871 return NULL;
3872
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003873 data = PyString_AS_STRING(self);
3874 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003875
3876 /* This does not use the preallocated list because splitlines is
3877 usually run with hundreds of newlines. The overhead of
3878 switching between PyList_SET_ITEM and append causes about a
3879 2-3% slowdown for that common case. A smarter implementation
3880 could move the if check out, so the SET_ITEMs are done first
3881 and the appends only done when the prealloc buffer is full.
3882 That's too much work for little gain.*/
3883
3884 list = PyList_New(0);
3885 if (!list)
3886 goto onError;
3887
3888 for (i = j = 0; i < len; ) {
3889 Py_ssize_t eol;
3890
3891 /* Find a line and append it */
3892 while (i < len && data[i] != '\n' && data[i] != '\r')
3893 i++;
3894
3895 /* Skip the line break reading CRLF as one line break */
3896 eol = i;
3897 if (i < len) {
3898 if (data[i] == '\r' && i + 1 < len &&
3899 data[i+1] == '\n')
3900 i += 2;
3901 else
3902 i++;
3903 if (keepends)
3904 eol = i;
3905 }
3906 SPLIT_APPEND(data, j, eol);
3907 j = i;
3908 }
3909 if (j < len) {
3910 SPLIT_APPEND(data, j, len);
3911 }
3912
3913 return list;
3914
3915 onError:
3916 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003917 return NULL;
3918}
3919
Robert Schuppenies51df0642008-06-01 16:16:17 +00003920PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003921"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003922
3923static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003924string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003925{
3926 Py_ssize_t res;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003927 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003928 return PyInt_FromSsize_t(res);
3929}
3930
Christian Heimes44720832008-05-26 13:01:01 +00003931#undef SPLIT_APPEND
3932#undef SPLIT_ADD
3933#undef MAX_PREALLOC
3934#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003935
3936static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003937string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003938{
Christian Heimes44720832008-05-26 13:01:01 +00003939 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003940}
3941
Christian Heimes1a6387e2008-03-26 12:49:49 +00003942
Christian Heimes44720832008-05-26 13:01:01 +00003943#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003944
Christian Heimes44720832008-05-26 13:01:01 +00003945PyDoc_STRVAR(format__doc__,
3946"S.format(*args, **kwargs) -> unicode\n\
3947\n\
3948");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003949
Eric Smithdc13b792008-05-30 18:10:04 +00003950static PyObject *
3951string__format__(PyObject* self, PyObject* args)
3952{
3953 PyObject *format_spec;
3954 PyObject *result = NULL;
3955 PyObject *tmp = NULL;
3956
3957 /* If 2.x, convert format_spec to the same type as value */
3958 /* This is to allow things like u''.format('') */
3959 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3960 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003961 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003962 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3963 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3964 goto done;
3965 }
3966 tmp = PyObject_Str(format_spec);
3967 if (tmp == NULL)
3968 goto done;
3969 format_spec = tmp;
3970
3971 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003972 PyString_AS_STRING(format_spec),
3973 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003974done:
3975 Py_XDECREF(tmp);
3976 return result;
3977}
3978
Christian Heimes44720832008-05-26 13:01:01 +00003979PyDoc_STRVAR(p_format__doc__,
3980"S.__format__(format_spec) -> unicode\n\
3981\n\
3982");
3983
3984
Christian Heimes1a6387e2008-03-26 12:49:49 +00003985static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003986string_methods[] = {
3987 /* Counterparts of the obsolete stropmodule functions; except
3988 string.maketrans(). */
3989 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3990 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3991 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3992 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3993 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3994 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3995 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3996 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3997 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3998 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3999 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4000 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4001 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4002 capitalize__doc__},
4003 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4004 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4005 endswith__doc__},
4006 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4007 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4008 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4009 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4010 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4011 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4012 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4013 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4014 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4015 rpartition__doc__},
4016 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4017 startswith__doc__},
4018 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4019 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4020 swapcase__doc__},
4021 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4022 translate__doc__},
4023 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4024 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4025 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4026 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4027 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4028 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4029 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4030 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4031 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4032 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4033 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4034 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4035 expandtabs__doc__},
4036 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4037 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004038 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4039 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004040 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4041 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004042};
4043
4044static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004045str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004046
Christian Heimes44720832008-05-26 13:01:01 +00004047static PyObject *
4048string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4049{
4050 PyObject *x = NULL;
4051 static char *kwlist[] = {"object", 0};
4052
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004053 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004054 return str_subtype_new(type, args, kwds);
4055 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4056 return NULL;
4057 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004058 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004059 return PyObject_Str(x);
4060}
4061
4062static PyObject *
4063str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4064{
4065 PyObject *tmp, *pnew;
4066 Py_ssize_t n;
4067
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004068 assert(PyType_IsSubtype(type, &PyString_Type));
4069 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004070 if (tmp == NULL)
4071 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004072 assert(PyString_CheckExact(tmp));
4073 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004074 pnew = type->tp_alloc(type, n);
4075 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004076 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4077 ((PyStringObject *)pnew)->ob_shash =
4078 ((PyStringObject *)tmp)->ob_shash;
4079 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004080 }
4081 Py_DECREF(tmp);
4082 return pnew;
4083}
4084
4085static PyObject *
4086basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4087{
4088 PyErr_SetString(PyExc_TypeError,
4089 "The basestring type cannot be instantiated");
4090 return NULL;
4091}
4092
4093static PyObject *
4094string_mod(PyObject *v, PyObject *w)
4095{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004096 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004097 Py_INCREF(Py_NotImplemented);
4098 return Py_NotImplemented;
4099 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004100 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004101}
4102
4103PyDoc_STRVAR(basestring_doc,
4104"Type basestring cannot be instantiated; it is the base for str and unicode.");
4105
4106static PyNumberMethods string_as_number = {
4107 0, /*nb_add*/
4108 0, /*nb_subtract*/
4109 0, /*nb_multiply*/
4110 0, /*nb_divide*/
4111 string_mod, /*nb_remainder*/
4112};
4113
4114
4115PyTypeObject PyBaseString_Type = {
4116 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4117 "basestring",
4118 0,
4119 0,
4120 0, /* tp_dealloc */
4121 0, /* tp_print */
4122 0, /* tp_getattr */
4123 0, /* tp_setattr */
4124 0, /* tp_compare */
4125 0, /* tp_repr */
4126 0, /* tp_as_number */
4127 0, /* tp_as_sequence */
4128 0, /* tp_as_mapping */
4129 0, /* tp_hash */
4130 0, /* tp_call */
4131 0, /* tp_str */
4132 0, /* tp_getattro */
4133 0, /* tp_setattro */
4134 0, /* tp_as_buffer */
4135 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4136 basestring_doc, /* tp_doc */
4137 0, /* tp_traverse */
4138 0, /* tp_clear */
4139 0, /* tp_richcompare */
4140 0, /* tp_weaklistoffset */
4141 0, /* tp_iter */
4142 0, /* tp_iternext */
4143 0, /* tp_methods */
4144 0, /* tp_members */
4145 0, /* tp_getset */
4146 &PyBaseObject_Type, /* tp_base */
4147 0, /* tp_dict */
4148 0, /* tp_descr_get */
4149 0, /* tp_descr_set */
4150 0, /* tp_dictoffset */
4151 0, /* tp_init */
4152 0, /* tp_alloc */
4153 basestring_new, /* tp_new */
4154 0, /* tp_free */
4155};
4156
4157PyDoc_STRVAR(string_doc,
4158"str(object) -> string\n\
4159\n\
4160Return a nice string representation of the object.\n\
4161If the argument is a string, the return value is the same object.");
4162
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004163PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004164 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4165 "str",
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004166 sizeof(PyStringObject),
Christian Heimes44720832008-05-26 13:01:01 +00004167 sizeof(char),
4168 string_dealloc, /* tp_dealloc */
4169 (printfunc)string_print, /* tp_print */
4170 0, /* tp_getattr */
4171 0, /* tp_setattr */
4172 0, /* tp_compare */
4173 string_repr, /* tp_repr */
4174 &string_as_number, /* tp_as_number */
4175 &string_as_sequence, /* tp_as_sequence */
4176 &string_as_mapping, /* tp_as_mapping */
4177 (hashfunc)string_hash, /* tp_hash */
4178 0, /* tp_call */
4179 string_str, /* tp_str */
4180 PyObject_GenericGetAttr, /* tp_getattro */
4181 0, /* tp_setattro */
4182 &string_as_buffer, /* tp_as_buffer */
4183 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4184 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4185 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4186 string_doc, /* tp_doc */
4187 0, /* tp_traverse */
4188 0, /* tp_clear */
4189 (richcmpfunc)string_richcompare, /* tp_richcompare */
4190 0, /* tp_weaklistoffset */
4191 0, /* tp_iter */
4192 0, /* tp_iternext */
4193 string_methods, /* tp_methods */
4194 0, /* tp_members */
4195 0, /* tp_getset */
4196 &PyBaseString_Type, /* tp_base */
4197 0, /* tp_dict */
4198 0, /* tp_descr_get */
4199 0, /* tp_descr_set */
4200 0, /* tp_dictoffset */
4201 0, /* tp_init */
4202 0, /* tp_alloc */
4203 string_new, /* tp_new */
4204 PyObject_Del, /* tp_free */
4205};
4206
4207void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004208PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004209{
4210 register PyObject *v;
4211 if (*pv == NULL)
4212 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004213 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004214 Py_DECREF(*pv);
4215 *pv = NULL;
4216 return;
4217 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004218 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004219 Py_DECREF(*pv);
4220 *pv = v;
4221}
4222
4223void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004224PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004225{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004226 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004227 Py_XDECREF(w);
4228}
4229
4230
4231/* The following function breaks the notion that strings are immutable:
4232 it changes the size of a string. We get away with this only if there
4233 is only one module referencing the object. You can also think of it
4234 as creating a new string object and destroying the old one, only
4235 more efficiently. In any case, don't use this if the string may
4236 already be known to some other part of the code...
4237 Note that if there's not enough memory to resize the string, the original
4238 string object at *pv is deallocated, *pv is set to NULL, an "out of
4239 memory" exception is set, and -1 is returned. Else (on success) 0 is
4240 returned, and the value in *pv may or may not be the same as on input.
4241 As always, an extra byte is allocated for a trailing \0 byte (newsize
4242 does *not* include that), and a trailing \0 byte is stored.
4243*/
4244
4245int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004246_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004247{
4248 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004249 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004250 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004251 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4252 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004253 *pv = 0;
4254 Py_DECREF(v);
4255 PyErr_BadInternalCall();
4256 return -1;
4257 }
4258 /* XXX UNREF/NEWREF interface should be more symmetrical */
4259 _Py_DEC_REFTOTAL;
4260 _Py_ForgetReference(v);
4261 *pv = (PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004262 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004263 if (*pv == NULL) {
4264 PyObject_Del(v);
4265 PyErr_NoMemory();
4266 return -1;
4267 }
4268 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004269 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004270 Py_SIZE(sv) = newsize;
4271 sv->ob_sval[newsize] = '\0';
4272 sv->ob_shash = -1; /* invalidate cached hash value */
4273 return 0;
4274}
4275
4276/* Helpers for formatstring */
4277
4278Py_LOCAL_INLINE(PyObject *)
4279getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4280{
4281 Py_ssize_t argidx = *p_argidx;
4282 if (argidx < arglen) {
4283 (*p_argidx)++;
4284 if (arglen < 0)
4285 return args;
4286 else
4287 return PyTuple_GetItem(args, argidx);
4288 }
4289 PyErr_SetString(PyExc_TypeError,
4290 "not enough arguments for format string");
4291 return NULL;
4292}
4293
4294/* Format codes
4295 * F_LJUST '-'
4296 * F_SIGN '+'
4297 * F_BLANK ' '
4298 * F_ALT '#'
4299 * F_ZERO '0'
4300 */
4301#define F_LJUST (1<<0)
4302#define F_SIGN (1<<1)
4303#define F_BLANK (1<<2)
4304#define F_ALT (1<<3)
4305#define F_ZERO (1<<4)
4306
4307Py_LOCAL_INLINE(int)
4308formatfloat(char *buf, size_t buflen, int flags,
4309 int prec, int type, PyObject *v)
4310{
4311 /* fmt = '%#.' + `prec` + `type`
4312 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4313 char fmt[20];
4314 double x;
4315 x = PyFloat_AsDouble(v);
4316 if (x == -1.0 && PyErr_Occurred()) {
4317 PyErr_Format(PyExc_TypeError, "float argument required, "
4318 "not %.200s", Py_TYPE(v)->tp_name);
4319 return -1;
4320 }
4321 if (prec < 0)
4322 prec = 6;
Eric Smith454816d2008-07-17 17:48:39 +00004323 if ((type == 'f' || type == 'F') && (fabs(x) / 1e25) >= 1e25) {
4324 if (type == 'f')
4325 type = 'g';
4326 else
4327 type = 'G';
4328 }
Christian Heimes44720832008-05-26 13:01:01 +00004329 /* Worst case length calc to ensure no buffer overrun:
4330
4331 'g' formats:
4332 fmt = %#.<prec>g
4333 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4334 for any double rep.)
4335 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4336
4337 'f' formats:
4338 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4339 len = 1 + 50 + 1 + prec = 52 + prec
4340
4341 If prec=0 the effective precision is 1 (the leading digit is
4342 always given), therefore increase the length by one.
4343
4344 */
4345 if (((type == 'g' || type == 'G') &&
4346 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smith454816d2008-07-17 17:48:39 +00004347 ((type == 'f' || type == 'F') &&
4348 buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004349 PyErr_SetString(PyExc_OverflowError,
4350 "formatted float is too long (precision too large?)");
4351 return -1;
4352 }
4353 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4354 (flags&F_ALT) ? "#" : "",
4355 prec, type);
4356 PyOS_ascii_formatd(buf, buflen, fmt, x);
4357 return (int)strlen(buf);
4358}
4359
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004360/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004361 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4362 * Python's regular ints.
4363 * Return value: a new PyString*, or NULL if error.
4364 * . *pbuf is set to point into it,
4365 * *plen set to the # of chars following that.
4366 * Caller must decref it when done using pbuf.
4367 * The string starting at *pbuf is of the form
4368 * "-"? ("0x" | "0X")? digit+
4369 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4370 * set in flags. The case of hex digits will be correct,
4371 * There will be at least prec digits, zero-filled on the left if
4372 * necessary to get that many.
4373 * val object to be converted
4374 * flags bitmask of format flags; only F_ALT is looked at
4375 * prec minimum number of digits; 0-fill on left if needed
4376 * type a character in [duoxX]; u acts the same as d
4377 *
4378 * CAUTION: o, x and X conversions on regular ints can never
4379 * produce a '-' sign, but can for Python's unbounded ints.
4380 */
4381PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004382_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004383 char **pbuf, int *plen)
4384{
4385 PyObject *result = NULL;
4386 char *buf;
4387 Py_ssize_t i;
4388 int sign; /* 1 if '-', else 0 */
4389 int len; /* number of characters */
4390 Py_ssize_t llen;
4391 int numdigits; /* len == numnondigits + numdigits */
4392 int numnondigits = 0;
4393
4394 switch (type) {
4395 case 'd':
4396 case 'u':
4397 result = Py_TYPE(val)->tp_str(val);
4398 break;
4399 case 'o':
4400 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4401 break;
4402 case 'x':
4403 case 'X':
4404 numnondigits = 2;
4405 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4406 break;
4407 default:
4408 assert(!"'type' not in [duoxX]");
4409 }
4410 if (!result)
4411 return NULL;
4412
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004413 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004414 if (!buf) {
4415 Py_DECREF(result);
4416 return NULL;
4417 }
4418
4419 /* To modify the string in-place, there can only be one reference. */
4420 if (Py_REFCNT(result) != 1) {
4421 PyErr_BadInternalCall();
4422 return NULL;
4423 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004424 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004425 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004426 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004427 return NULL;
4428 }
4429 len = (int)llen;
4430 if (buf[len-1] == 'L') {
4431 --len;
4432 buf[len] = '\0';
4433 }
4434 sign = buf[0] == '-';
4435 numnondigits += sign;
4436 numdigits = len - numnondigits;
4437 assert(numdigits > 0);
4438
4439 /* Get rid of base marker unless F_ALT */
4440 if ((flags & F_ALT) == 0) {
4441 /* Need to skip 0x, 0X or 0. */
4442 int skipped = 0;
4443 switch (type) {
4444 case 'o':
4445 assert(buf[sign] == '0');
4446 /* If 0 is only digit, leave it alone. */
4447 if (numdigits > 1) {
4448 skipped = 1;
4449 --numdigits;
4450 }
4451 break;
4452 case 'x':
4453 case 'X':
4454 assert(buf[sign] == '0');
4455 assert(buf[sign + 1] == 'x');
4456 skipped = 2;
4457 numnondigits -= 2;
4458 break;
4459 }
4460 if (skipped) {
4461 buf += skipped;
4462 len -= skipped;
4463 if (sign)
4464 buf[0] = '-';
4465 }
4466 assert(len == numnondigits + numdigits);
4467 assert(numdigits > 0);
4468 }
4469
4470 /* Fill with leading zeroes to meet minimum width. */
4471 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004472 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004473 numnondigits + prec);
4474 char *b1;
4475 if (!r1) {
4476 Py_DECREF(result);
4477 return NULL;
4478 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004479 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004480 for (i = 0; i < numnondigits; ++i)
4481 *b1++ = *buf++;
4482 for (i = 0; i < prec - numdigits; i++)
4483 *b1++ = '0';
4484 for (i = 0; i < numdigits; i++)
4485 *b1++ = *buf++;
4486 *b1 = '\0';
4487 Py_DECREF(result);
4488 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004489 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004490 len = numnondigits + prec;
4491 }
4492
4493 /* Fix up case for hex conversions. */
4494 if (type == 'X') {
4495 /* Need to convert all lower case letters to upper case.
4496 and need to convert 0x to 0X (and -0x to -0X). */
4497 for (i = 0; i < len; i++)
4498 if (buf[i] >= 'a' && buf[i] <= 'x')
4499 buf[i] -= 'a'-'A';
4500 }
4501 *pbuf = buf;
4502 *plen = len;
4503 return result;
4504}
4505
4506Py_LOCAL_INLINE(int)
4507formatint(char *buf, size_t buflen, int flags,
4508 int prec, int type, PyObject *v)
4509{
4510 /* fmt = '%#.' + `prec` + 'l' + `type`
4511 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4512 + 1 + 1 = 24 */
4513 char fmt[64]; /* plenty big enough! */
4514 char *sign;
4515 long x;
4516
4517 x = PyInt_AsLong(v);
4518 if (x == -1 && PyErr_Occurred()) {
4519 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4520 Py_TYPE(v)->tp_name);
4521 return -1;
4522 }
4523 if (x < 0 && type == 'u') {
4524 type = 'd';
4525 }
4526 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4527 sign = "-";
4528 else
4529 sign = "";
4530 if (prec < 0)
4531 prec = 1;
4532
4533 if ((flags & F_ALT) &&
4534 (type == 'x' || type == 'X')) {
4535 /* When converting under %#x or %#X, there are a number
4536 * of issues that cause pain:
4537 * - when 0 is being converted, the C standard leaves off
4538 * the '0x' or '0X', which is inconsistent with other
4539 * %#x/%#X conversions and inconsistent with Python's
4540 * hex() function
4541 * - there are platforms that violate the standard and
4542 * convert 0 with the '0x' or '0X'
4543 * (Metrowerks, Compaq Tru64)
4544 * - there are platforms that give '0x' when converting
4545 * under %#X, but convert 0 in accordance with the
4546 * standard (OS/2 EMX)
4547 *
4548 * We can achieve the desired consistency by inserting our
4549 * own '0x' or '0X' prefix, and substituting %x/%X in place
4550 * of %#x/%#X.
4551 *
4552 * Note that this is the same approach as used in
4553 * formatint() in unicodeobject.c
4554 */
4555 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4556 sign, type, prec, type);
4557 }
4558 else {
4559 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4560 sign, (flags&F_ALT) ? "#" : "",
4561 prec, type);
4562 }
4563
4564 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4565 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4566 */
4567 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4568 PyErr_SetString(PyExc_OverflowError,
4569 "formatted integer is too long (precision too large?)");
4570 return -1;
4571 }
4572 if (sign[0])
4573 PyOS_snprintf(buf, buflen, fmt, -x);
4574 else
4575 PyOS_snprintf(buf, buflen, fmt, x);
4576 return (int)strlen(buf);
4577}
4578
4579Py_LOCAL_INLINE(int)
4580formatchar(char *buf, size_t buflen, PyObject *v)
4581{
4582 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004583 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004584 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4585 return -1;
4586 }
4587 else {
4588 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4589 return -1;
4590 }
4591 buf[1] = '\0';
4592 return 1;
4593}
4594
4595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4596
4597 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4598 chars are formatted. XXX This is a magic number. Each formatting
4599 routine does bounds checking to ensure no overflow, but a better
4600 solution may be to malloc a buffer of appropriate size for each
4601 format. For now, the current solution is sufficient.
4602*/
4603#define FORMATBUFLEN (size_t)120
4604
4605PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004606PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004607{
4608 char *fmt, *res;
4609 Py_ssize_t arglen, argidx;
4610 Py_ssize_t reslen, rescnt, fmtcnt;
4611 int args_owned = 0;
4612 PyObject *result, *orig_args;
4613#ifdef Py_USING_UNICODE
4614 PyObject *v, *w;
4615#endif
4616 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004617 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004618 PyErr_BadInternalCall();
4619 return NULL;
4620 }
4621 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004622 fmt = PyString_AS_STRING(format);
4623 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004624 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004625 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004626 if (result == NULL)
4627 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004628 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004629 if (PyTuple_Check(args)) {
4630 arglen = PyTuple_GET_SIZE(args);
4631 argidx = 0;
4632 }
4633 else {
4634 arglen = -1;
4635 argidx = -2;
4636 }
4637 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4638 !PyObject_TypeCheck(args, &PyBaseString_Type))
4639 dict = args;
4640 while (--fmtcnt >= 0) {
4641 if (*fmt != '%') {
4642 if (--rescnt < 0) {
4643 rescnt = fmtcnt + 100;
4644 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004645 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004646 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004647 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004648 + reslen - rescnt;
4649 --rescnt;
4650 }
4651 *res++ = *fmt++;
4652 }
4653 else {
4654 /* Got a format specifier */
4655 int flags = 0;
4656 Py_ssize_t width = -1;
4657 int prec = -1;
4658 int c = '\0';
4659 int fill;
4660 int isnumok;
4661 PyObject *v = NULL;
4662 PyObject *temp = NULL;
4663 char *pbuf;
4664 int sign;
4665 Py_ssize_t len;
4666 char formatbuf[FORMATBUFLEN];
4667 /* For format{float,int,char}() */
4668#ifdef Py_USING_UNICODE
4669 char *fmt_start = fmt;
4670 Py_ssize_t argidx_start = argidx;
4671#endif
4672
4673 fmt++;
4674 if (*fmt == '(') {
4675 char *keystart;
4676 Py_ssize_t keylen;
4677 PyObject *key;
4678 int pcount = 1;
4679
4680 if (dict == NULL) {
4681 PyErr_SetString(PyExc_TypeError,
4682 "format requires a mapping");
4683 goto error;
4684 }
4685 ++fmt;
4686 --fmtcnt;
4687 keystart = fmt;
4688 /* Skip over balanced parentheses */
4689 while (pcount > 0 && --fmtcnt >= 0) {
4690 if (*fmt == ')')
4691 --pcount;
4692 else if (*fmt == '(')
4693 ++pcount;
4694 fmt++;
4695 }
4696 keylen = fmt - keystart - 1;
4697 if (fmtcnt < 0 || pcount > 0) {
4698 PyErr_SetString(PyExc_ValueError,
4699 "incomplete format key");
4700 goto error;
4701 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004702 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004703 keylen);
4704 if (key == NULL)
4705 goto error;
4706 if (args_owned) {
4707 Py_DECREF(args);
4708 args_owned = 0;
4709 }
4710 args = PyObject_GetItem(dict, key);
4711 Py_DECREF(key);
4712 if (args == NULL) {
4713 goto error;
4714 }
4715 args_owned = 1;
4716 arglen = -1;
4717 argidx = -2;
4718 }
4719 while (--fmtcnt >= 0) {
4720 switch (c = *fmt++) {
4721 case '-': flags |= F_LJUST; continue;
4722 case '+': flags |= F_SIGN; continue;
4723 case ' ': flags |= F_BLANK; continue;
4724 case '#': flags |= F_ALT; continue;
4725 case '0': flags |= F_ZERO; continue;
4726 }
4727 break;
4728 }
4729 if (c == '*') {
4730 v = getnextarg(args, arglen, &argidx);
4731 if (v == NULL)
4732 goto error;
4733 if (!PyInt_Check(v)) {
4734 PyErr_SetString(PyExc_TypeError,
4735 "* wants int");
4736 goto error;
4737 }
4738 width = PyInt_AsLong(v);
4739 if (width < 0) {
4740 flags |= F_LJUST;
4741 width = -width;
4742 }
4743 if (--fmtcnt >= 0)
4744 c = *fmt++;
4745 }
4746 else if (c >= 0 && isdigit(c)) {
4747 width = c - '0';
4748 while (--fmtcnt >= 0) {
4749 c = Py_CHARMASK(*fmt++);
4750 if (!isdigit(c))
4751 break;
4752 if ((width*10) / 10 != width) {
4753 PyErr_SetString(
4754 PyExc_ValueError,
4755 "width too big");
4756 goto error;
4757 }
4758 width = width*10 + (c - '0');
4759 }
4760 }
4761 if (c == '.') {
4762 prec = 0;
4763 if (--fmtcnt >= 0)
4764 c = *fmt++;
4765 if (c == '*') {
4766 v = getnextarg(args, arglen, &argidx);
4767 if (v == NULL)
4768 goto error;
4769 if (!PyInt_Check(v)) {
4770 PyErr_SetString(
4771 PyExc_TypeError,
4772 "* wants int");
4773 goto error;
4774 }
4775 prec = PyInt_AsLong(v);
4776 if (prec < 0)
4777 prec = 0;
4778 if (--fmtcnt >= 0)
4779 c = *fmt++;
4780 }
4781 else if (c >= 0 && isdigit(c)) {
4782 prec = c - '0';
4783 while (--fmtcnt >= 0) {
4784 c = Py_CHARMASK(*fmt++);
4785 if (!isdigit(c))
4786 break;
4787 if ((prec*10) / 10 != prec) {
4788 PyErr_SetString(
4789 PyExc_ValueError,
4790 "prec too big");
4791 goto error;
4792 }
4793 prec = prec*10 + (c - '0');
4794 }
4795 }
4796 } /* prec */
4797 if (fmtcnt >= 0) {
4798 if (c == 'h' || c == 'l' || c == 'L') {
4799 if (--fmtcnt >= 0)
4800 c = *fmt++;
4801 }
4802 }
4803 if (fmtcnt < 0) {
4804 PyErr_SetString(PyExc_ValueError,
4805 "incomplete format");
4806 goto error;
4807 }
4808 if (c != '%') {
4809 v = getnextarg(args, arglen, &argidx);
4810 if (v == NULL)
4811 goto error;
4812 }
4813 sign = 0;
4814 fill = ' ';
4815 switch (c) {
4816 case '%':
4817 pbuf = "%";
4818 len = 1;
4819 break;
4820 case 's':
4821#ifdef Py_USING_UNICODE
4822 if (PyUnicode_Check(v)) {
4823 fmt = fmt_start;
4824 argidx = argidx_start;
4825 goto unicode;
4826 }
4827#endif
4828 temp = _PyObject_Str(v);
4829#ifdef Py_USING_UNICODE
4830 if (temp != NULL && PyUnicode_Check(temp)) {
4831 Py_DECREF(temp);
4832 fmt = fmt_start;
4833 argidx = argidx_start;
4834 goto unicode;
4835 }
4836#endif
4837 /* Fall through */
4838 case 'r':
4839 if (c == 'r')
4840 temp = PyObject_Repr(v);
4841 if (temp == NULL)
4842 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004843 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004844 PyErr_SetString(PyExc_TypeError,
4845 "%s argument has non-string str()");
4846 Py_DECREF(temp);
4847 goto error;
4848 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004849 pbuf = PyString_AS_STRING(temp);
4850 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004851 if (prec >= 0 && len > prec)
4852 len = prec;
4853 break;
4854 case 'i':
4855 case 'd':
4856 case 'u':
4857 case 'o':
4858 case 'x':
4859 case 'X':
4860 if (c == 'i')
4861 c = 'd';
4862 isnumok = 0;
4863 if (PyNumber_Check(v)) {
4864 PyObject *iobj=NULL;
4865
4866 if (PyInt_Check(v) || (PyLong_Check(v))) {
4867 iobj = v;
4868 Py_INCREF(iobj);
4869 }
4870 else {
4871 iobj = PyNumber_Int(v);
4872 if (iobj==NULL) iobj = PyNumber_Long(v);
4873 }
4874 if (iobj!=NULL) {
4875 if (PyInt_Check(iobj)) {
4876 isnumok = 1;
4877 pbuf = formatbuf;
4878 len = formatint(pbuf,
4879 sizeof(formatbuf),
4880 flags, prec, c, iobj);
4881 Py_DECREF(iobj);
4882 if (len < 0)
4883 goto error;
4884 sign = 1;
4885 }
4886 else if (PyLong_Check(iobj)) {
4887 int ilen;
4888
4889 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004890 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004891 prec, c, &pbuf, &ilen);
4892 Py_DECREF(iobj);
4893 len = ilen;
4894 if (!temp)
4895 goto error;
4896 sign = 1;
4897 }
4898 else {
4899 Py_DECREF(iobj);
4900 }
4901 }
4902 }
4903 if (!isnumok) {
4904 PyErr_Format(PyExc_TypeError,
4905 "%%%c format: a number is required, "
4906 "not %.200s", c, Py_TYPE(v)->tp_name);
4907 goto error;
4908 }
4909 if (flags & F_ZERO)
4910 fill = '0';
4911 break;
4912 case 'e':
4913 case 'E':
4914 case 'f':
4915 case 'F':
4916 case 'g':
4917 case 'G':
Christian Heimes44720832008-05-26 13:01:01 +00004918 pbuf = formatbuf;
4919 len = formatfloat(pbuf, sizeof(formatbuf),
4920 flags, prec, c, v);
4921 if (len < 0)
4922 goto error;
4923 sign = 1;
4924 if (flags & F_ZERO)
4925 fill = '0';
4926 break;
4927 case 'c':
4928#ifdef Py_USING_UNICODE
4929 if (PyUnicode_Check(v)) {
4930 fmt = fmt_start;
4931 argidx = argidx_start;
4932 goto unicode;
4933 }
4934#endif
4935 pbuf = formatbuf;
4936 len = formatchar(pbuf, sizeof(formatbuf), v);
4937 if (len < 0)
4938 goto error;
4939 break;
4940 default:
4941 PyErr_Format(PyExc_ValueError,
4942 "unsupported format character '%c' (0x%x) "
4943 "at index %zd",
4944 c, c,
4945 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004946 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004947 goto error;
4948 }
4949 if (sign) {
4950 if (*pbuf == '-' || *pbuf == '+') {
4951 sign = *pbuf++;
4952 len--;
4953 }
4954 else if (flags & F_SIGN)
4955 sign = '+';
4956 else if (flags & F_BLANK)
4957 sign = ' ';
4958 else
4959 sign = 0;
4960 }
4961 if (width < len)
4962 width = len;
4963 if (rescnt - (sign != 0) < width) {
4964 reslen -= rescnt;
4965 rescnt = width + fmtcnt + 100;
4966 reslen += rescnt;
4967 if (reslen < 0) {
4968 Py_DECREF(result);
4969 Py_XDECREF(temp);
4970 return PyErr_NoMemory();
4971 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004972 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00004973 Py_XDECREF(temp);
4974 return NULL;
4975 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004976 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004977 + reslen - rescnt;
4978 }
4979 if (sign) {
4980 if (fill != ' ')
4981 *res++ = sign;
4982 rescnt--;
4983 if (width > len)
4984 width--;
4985 }
4986 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4987 assert(pbuf[0] == '0');
4988 assert(pbuf[1] == c);
4989 if (fill != ' ') {
4990 *res++ = *pbuf++;
4991 *res++ = *pbuf++;
4992 }
4993 rescnt -= 2;
4994 width -= 2;
4995 if (width < 0)
4996 width = 0;
4997 len -= 2;
4998 }
4999 if (width > len && !(flags & F_LJUST)) {
5000 do {
5001 --rescnt;
5002 *res++ = fill;
5003 } while (--width > len);
5004 }
5005 if (fill == ' ') {
5006 if (sign)
5007 *res++ = sign;
5008 if ((flags & F_ALT) &&
5009 (c == 'x' || c == 'X')) {
5010 assert(pbuf[0] == '0');
5011 assert(pbuf[1] == c);
5012 *res++ = *pbuf++;
5013 *res++ = *pbuf++;
5014 }
5015 }
5016 Py_MEMCPY(res, pbuf, len);
5017 res += len;
5018 rescnt -= len;
5019 while (--width >= len) {
5020 --rescnt;
5021 *res++ = ' ';
5022 }
5023 if (dict && (argidx < arglen) && c != '%') {
5024 PyErr_SetString(PyExc_TypeError,
5025 "not all arguments converted during string formatting");
5026 Py_XDECREF(temp);
5027 goto error;
5028 }
5029 Py_XDECREF(temp);
5030 } /* '%' */
5031 } /* until end */
5032 if (argidx < arglen && !dict) {
5033 PyErr_SetString(PyExc_TypeError,
5034 "not all arguments converted during string formatting");
5035 goto error;
5036 }
5037 if (args_owned) {
5038 Py_DECREF(args);
5039 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005040 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005041 return result;
5042
5043#ifdef Py_USING_UNICODE
5044 unicode:
5045 if (args_owned) {
5046 Py_DECREF(args);
5047 args_owned = 0;
5048 }
5049 /* Fiddle args right (remove the first argidx arguments) */
5050 if (PyTuple_Check(orig_args) && argidx > 0) {
5051 PyObject *v;
5052 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5053 v = PyTuple_New(n);
5054 if (v == NULL)
5055 goto error;
5056 while (--n >= 0) {
5057 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5058 Py_INCREF(w);
5059 PyTuple_SET_ITEM(v, n, w);
5060 }
5061 args = v;
5062 } else {
5063 Py_INCREF(orig_args);
5064 args = orig_args;
5065 }
5066 args_owned = 1;
5067 /* Take what we have of the result and let the Unicode formatting
5068 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005069 rescnt = res - PyString_AS_STRING(result);
5070 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005071 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005072 fmtcnt = PyString_GET_SIZE(format) - \
5073 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005074 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5075 if (format == NULL)
5076 goto error;
5077 v = PyUnicode_Format(format, args);
5078 Py_DECREF(format);
5079 if (v == NULL)
5080 goto error;
5081 /* Paste what we have (result) to what the Unicode formatting
5082 function returned (v) and return the result (or error) */
5083 w = PyUnicode_Concat(result, v);
5084 Py_DECREF(result);
5085 Py_DECREF(v);
5086 Py_DECREF(args);
5087 return w;
5088#endif /* Py_USING_UNICODE */
5089
5090 error:
5091 Py_DECREF(result);
5092 if (args_owned) {
5093 Py_DECREF(args);
5094 }
5095 return NULL;
5096}
5097
5098void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005099PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005100{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005101 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005102 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005103 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005104 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005105 /* If it's a string subclass, we don't really know what putting
5106 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005107 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005108 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005109 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005110 return;
5111 if (interned == NULL) {
5112 interned = PyDict_New();
5113 if (interned == NULL) {
5114 PyErr_Clear(); /* Don't leave an exception */
5115 return;
5116 }
5117 }
5118 t = PyDict_GetItem(interned, (PyObject *)s);
5119 if (t) {
5120 Py_INCREF(t);
5121 Py_DECREF(*p);
5122 *p = t;
5123 return;
5124 }
5125
5126 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5127 PyErr_Clear();
5128 return;
5129 }
5130 /* The two references in interned are not counted by refcnt.
5131 The string deallocator will take care of this */
5132 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005133 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005134}
5135
5136void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005137PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005138{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005139 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005140 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5141 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005142 Py_INCREF(*p);
5143 }
5144}
5145
5146
5147PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005148PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005149{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005150 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005151 if (s == NULL)
5152 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005153 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005154 return s;
5155}
5156
5157void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005158PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005159{
5160 int i;
5161 for (i = 0; i < UCHAR_MAX + 1; i++) {
5162 Py_XDECREF(characters[i]);
5163 characters[i] = NULL;
5164 }
5165 Py_XDECREF(nullstring);
5166 nullstring = NULL;
5167}
5168
5169void _Py_ReleaseInternedStrings(void)
5170{
5171 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005172 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005173 Py_ssize_t i, n;
5174 Py_ssize_t immortal_size = 0, mortal_size = 0;
5175
5176 if (interned == NULL || !PyDict_Check(interned))
5177 return;
5178 keys = PyDict_Keys(interned);
5179 if (keys == NULL || !PyList_Check(keys)) {
5180 PyErr_Clear();
5181 return;
5182 }
5183
5184 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5185 detector, interned strings are not forcibly deallocated; rather, we
5186 give them their stolen references back, and then clear and DECREF
5187 the interned dict. */
5188
5189 n = PyList_GET_SIZE(keys);
5190 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5191 n);
5192 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005193 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005194 switch (s->ob_sstate) {
5195 case SSTATE_NOT_INTERNED:
5196 /* XXX Shouldn't happen */
5197 break;
5198 case SSTATE_INTERNED_IMMORTAL:
5199 Py_REFCNT(s) += 1;
5200 immortal_size += Py_SIZE(s);
5201 break;
5202 case SSTATE_INTERNED_MORTAL:
5203 Py_REFCNT(s) += 2;
5204 mortal_size += Py_SIZE(s);
5205 break;
5206 default:
5207 Py_FatalError("Inconsistent interned string state.");
5208 }
5209 s->ob_sstate = SSTATE_NOT_INTERNED;
5210 }
5211 fprintf(stderr, "total size of all interned strings: "
5212 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5213 "mortal/immortal\n", mortal_size, immortal_size);
5214 Py_DECREF(keys);
5215 PyDict_Clear(interned);
5216 Py_DECREF(interned);
5217 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005218}