blob: 6d53a099f87ac9f311151173b6c06086ef004f8b [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000054 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000055 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000058 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
61#ifdef COUNT_ALLOCS
62 null_strings++;
63#endif
64 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
70#ifdef COUNT_ALLOCS
71 one_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76
Neal Norwitze7d8be82008-07-31 17:17:14 +000077 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78 PyErr_SetString(PyExc_OverflowError, "string is too large");
79 return NULL;
80 }
81
Christian Heimes44720832008-05-26 13:01:01 +000082 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000083 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000086 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000087 op->ob_shash = -1;
88 op->ob_sstate = SSTATE_NOT_INTERNED;
89 if (str != NULL)
90 Py_MEMCPY(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
92 /* share short strings */
93 if (size == 0) {
94 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000095 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000096 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +000097 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000101 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000102 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
105 }
106 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000107}
108
Christian Heimes44720832008-05-26 13:01:01 +0000109PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000110PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000111{
Christian Heimes44720832008-05-26 13:01:01 +0000112 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000113 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000114
115 assert(str != NULL);
116 size = strlen(str);
Neal Norwitze7d8be82008-07-31 17:17:14 +0000117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
Christian Heimes44720832008-05-26 13:01:01 +0000118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
122 if (size == 0 && (op = nullstring) != NULL) {
123#ifdef COUNT_ALLOCS
124 null_strings++;
125#endif
126 Py_INCREF(op);
127 return (PyObject *)op;
128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
136
137 /* Inline PyObject_NewVar */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +0000139 if (op == NULL)
140 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000141 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000142 op->ob_shash = -1;
143 op->ob_sstate = SSTATE_NOT_INTERNED;
144 Py_MEMCPY(op->ob_sval, str, size+1);
145 /* share short strings */
146 if (size == 0) {
147 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000148 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000149 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000150 nullstring = op;
151 Py_INCREF(op);
152 } else if (size == 1) {
153 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000154 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000155 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000156 characters[*str & UCHAR_MAX] = op;
157 Py_INCREF(op);
158 }
159 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000160}
161
Christian Heimes44720832008-05-26 13:01:01 +0000162PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000163PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000164{
Christian Heimes44720832008-05-26 13:01:01 +0000165 va_list count;
166 Py_ssize_t n = 0;
167 const char* f;
168 char *s;
169 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000170
Christian Heimes44720832008-05-26 13:01:01 +0000171#ifdef VA_LIST_IS_ARRAY
172 Py_MEMCPY(count, vargs, sizeof(va_list));
173#else
174#ifdef __va_copy
175 __va_copy(count, vargs);
176#else
177 count = vargs;
178#endif
179#endif
180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185 ;
186
187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
189 */
190 if ((*f == 'l' || *f == 'z') &&
191 (f[1] == 'd' || f[1] == 'u'))
192 ++f;
193
194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
218 */
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
230 }
231 } else
232 n++;
233 }
234 expand:
235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000238 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000239 if (!string)
240 return NULL;
241
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000242 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000243
244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 Py_ssize_t i;
248 int longflag = 0;
249 int size_tflag = 0;
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 if (*f == '.') {
256 f++;
257 n = 0;
258 while (isdigit(Py_CHARMASK(*f)))
259 n = (n*10) + *f++ - '0';
260 }
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
262 f++;
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
266 longflag = 1;
267 ++f;
268 }
269 /* handle the size_t flag. */
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
271 size_tflag = 1;
272 ++f;
273 }
274
275 switch (*f) {
276 case 'c':
277 *s++ = va_arg(vargs, int);
278 break;
279 case 'd':
280 if (longflag)
281 sprintf(s, "%ld", va_arg(vargs, long));
282 else if (size_tflag)
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284 va_arg(vargs, Py_ssize_t));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'u':
290 if (longflag)
291 sprintf(s, "%lu",
292 va_arg(vargs, unsigned long));
293 else if (size_tflag)
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295 va_arg(vargs, size_t));
296 else
297 sprintf(s, "%u",
298 va_arg(vargs, unsigned int));
299 s += strlen(s);
300 break;
301 case 'i':
302 sprintf(s, "%i", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'x':
306 sprintf(s, "%x", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 's':
310 p = va_arg(vargs, char*);
311 i = strlen(p);
312 if (n > 0 && i > n)
313 i = n;
314 Py_MEMCPY(s, p, i);
315 s += i;
316 break;
317 case 'p':
318 sprintf(s, "%p", va_arg(vargs, void*));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (s[1] == 'X')
321 s[1] = 'x';
322 else if (s[1] != 'x') {
323 memmove(s+2, s, strlen(s)+1);
324 s[0] = '0';
325 s[1] = 'x';
326 }
327 s += strlen(s);
328 break;
329 case '%':
330 *s++ = '%';
331 break;
332 default:
333 strcpy(s, p);
334 s += strlen(s);
335 goto end;
336 }
337 } else
338 *s++ = *f;
339 }
340
341 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000342 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000343 return string;
344}
345
346PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000347PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000348{
349 PyObject* ret;
350 va_list vargs;
351
352#ifdef HAVE_STDARG_PROTOTYPES
353 va_start(vargs, format);
354#else
355 va_start(vargs);
356#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000357 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000358 va_end(vargs);
359 return ret;
360}
361
362
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000363PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000364 Py_ssize_t size,
365 const char *encoding,
366 const char *errors)
367{
368 PyObject *v, *str;
369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000370 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000371 if (str == NULL)
372 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000374 Py_DECREF(str);
375 return v;
376}
377
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000378PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000379 const char *encoding,
380 const char *errors)
381{
382 PyObject *v;
383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000384 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000385 PyErr_BadArgument();
386 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000387 }
388
Christian Heimes44720832008-05-26 13:01:01 +0000389 if (encoding == NULL) {
390#ifdef Py_USING_UNICODE
391 encoding = PyUnicode_GetDefaultEncoding();
392#else
393 PyErr_SetString(PyExc_ValueError, "no encoding specified");
394 goto onError;
395#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
Christian Heimes44720832008-05-26 13:01:01 +0000397
398 /* Decode via the codec registry */
399 v = PyCodec_Decode(str, encoding, errors);
400 if (v == NULL)
401 goto onError;
402
403 return v;
404
405 onError:
406 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407}
408
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000409PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000410 const char *encoding,
411 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000412{
Christian Heimes44720832008-05-26 13:01:01 +0000413 PyObject *v;
414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000415 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000416 if (v == NULL)
417 goto onError;
418
419#ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v)) {
422 PyObject *temp = v;
423 v = PyUnicode_AsEncodedString(v, NULL, NULL);
424 Py_DECREF(temp);
425 if (v == NULL)
426 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000427 }
Christian Heimes44720832008-05-26 13:01:01 +0000428#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000430 PyErr_Format(PyExc_TypeError,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v)->tp_name);
433 Py_DECREF(v);
434 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000435 }
Christian Heimes44720832008-05-26 13:01:01 +0000436
437 return v;
438
439 onError:
440 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000441}
442
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000443PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000444 Py_ssize_t size,
445 const char *encoding,
446 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000447{
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyObject *v, *str;
449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000450 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000451 if (str == NULL)
452 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000454 Py_DECREF(str);
455 return v;
456}
457
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000458PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000459 const char *encoding,
460 const char *errors)
461{
462 PyObject *v;
463
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000464 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000465 PyErr_BadArgument();
466 goto onError;
467 }
468
469 if (encoding == NULL) {
470#ifdef Py_USING_UNICODE
471 encoding = PyUnicode_GetDefaultEncoding();
472#else
473 PyErr_SetString(PyExc_ValueError, "no encoding specified");
474 goto onError;
475#endif
476 }
477
478 /* Encode via the codec registry */
479 v = PyCodec_Encode(str, encoding, errors);
480 if (v == NULL)
481 goto onError;
482
483 return v;
484
485 onError:
486 return NULL;
487}
488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000489PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000490 const char *encoding,
491 const char *errors)
492{
493 PyObject *v;
494
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000495 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000496 if (v == NULL)
497 goto onError;
498
499#ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v)) {
502 PyObject *temp = v;
503 v = PyUnicode_AsEncodedString(v, NULL, NULL);
504 Py_DECREF(temp);
505 if (v == NULL)
506 goto onError;
507 }
508#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000510 PyErr_Format(PyExc_TypeError,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v)->tp_name);
513 Py_DECREF(v);
514 goto onError;
515 }
516
517 return v;
518
519 onError:
520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000521}
522
523static void
Christian Heimes44720832008-05-26 13:01:01 +0000524string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000525{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000526 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000527 case SSTATE_NOT_INTERNED:
528 break;
529
530 case SSTATE_INTERNED_MORTAL:
531 /* revive dead object temporarily for DelItem */
532 Py_REFCNT(op) = 3;
533 if (PyDict_DelItem(interned, op) != 0)
534 Py_FatalError(
535 "deletion of interned string failed");
536 break;
537
538 case SSTATE_INTERNED_IMMORTAL:
539 Py_FatalError("Immortal interned string died.");
540
541 default:
542 Py_FatalError("Inconsistent interned string state.");
543 }
544 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000545}
546
Christian Heimes44720832008-05-26 13:01:01 +0000547/* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000553 Py_ssize_t len,
554 const char *errors,
555 Py_ssize_t unicode,
556 const char *recode_encoding)
557{
558 int c;
559 char *p, *buf;
560 const char *end;
561 PyObject *v;
562 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000563 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000564 if (v == NULL)
565 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000566 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000567 end = s + len;
568 while (s < end) {
569 if (*s != '\\') {
570 non_esc:
571#ifdef Py_USING_UNICODE
572 if (recode_encoding && (*s & 0x80)) {
573 PyObject *u, *w;
574 char *r;
575 const char* t;
576 Py_ssize_t rn;
577 t = s;
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t < end && (*t & 0x80)) t++;
580 u = PyUnicode_DecodeUTF8(s, t - s, errors);
581 if(!u) goto failed;
582
583 /* Recode them in target encoding. */
584 w = PyUnicode_AsEncodedString(
585 u, recode_encoding, errors);
586 Py_DECREF(u);
587 if (!w) goto failed;
588
589 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000590 assert(PyString_Check(w));
591 r = PyString_AS_STRING(w);
592 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000593 Py_MEMCPY(p, r, rn);
594 p += rn;
595 Py_DECREF(w);
596 s = t;
597 } else {
598 *p++ = *s++;
599 }
600#else
601 *p++ = *s++;
602#endif
603 continue;
604 }
605 s++;
606 if (s==end) {
607 PyErr_SetString(PyExc_ValueError,
608 "Trailing \\ in string");
609 goto failed;
610 }
611 switch (*s++) {
612 /* XXX This assumes ASCII! */
613 case '\n': break;
614 case '\\': *p++ = '\\'; break;
615 case '\'': *p++ = '\''; break;
616 case '\"': *p++ = '\"'; break;
617 case 'b': *p++ = '\b'; break;
618 case 'f': *p++ = '\014'; break; /* FF */
619 case 't': *p++ = '\t'; break;
620 case 'n': *p++ = '\n'; break;
621 case 'r': *p++ = '\r'; break;
622 case 'v': *p++ = '\013'; break; /* VT */
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
626 c = s[-1] - '0';
627 if (s < end && '0' <= *s && *s <= '7') {
628 c = (c<<3) + *s++ - '0';
629 if (s < end && '0' <= *s && *s <= '7')
630 c = (c<<3) + *s++ - '0';
631 }
632 *p++ = c;
633 break;
634 case 'x':
635 if (s+1 < end &&
636 isxdigit(Py_CHARMASK(s[0])) &&
637 isxdigit(Py_CHARMASK(s[1])))
638 {
639 unsigned int x = 0;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x = c - '0';
644 else if (islower(c))
645 x = 10 + c - 'a';
646 else
647 x = 10 + c - 'A';
648 x = x << 4;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x += c - '0';
653 else if (islower(c))
654 x += 10 + c - 'a';
655 else
656 x += 10 + c - 'A';
657 *p++ = x;
658 break;
659 }
660 if (!errors || strcmp(errors, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError,
662 "invalid \\x escape");
663 goto failed;
664 }
665 if (strcmp(errors, "replace") == 0) {
666 *p++ = '?';
667 } else if (strcmp(errors, "ignore") == 0)
668 /* do nothing */;
669 else {
670 PyErr_Format(PyExc_ValueError,
671 "decoding error; "
672 "unknown error handling code: %.400s",
673 errors);
674 goto failed;
675 }
676#ifndef Py_USING_UNICODE
677 case 'u':
678 case 'U':
679 case 'N':
680 if (unicode) {
681 PyErr_SetString(PyExc_ValueError,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
684 goto failed;
685 }
686#endif
687 default:
688 *p++ = '\\';
689 s--;
690 goto non_esc; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
692 }
693 }
694 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000695 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000696 return v;
697 failed:
698 Py_DECREF(v);
699 return NULL;
700}
701
702/* -------------------------------------------------------------------- */
703/* object api */
704
Christian Heimes1a6387e2008-03-26 12:49:49 +0000705static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000706string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000707{
Christian Heimes44720832008-05-26 13:01:01 +0000708 char *s;
709 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000711 return -1;
712 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000713}
714
Christian Heimes44720832008-05-26 13:01:01 +0000715static /*const*/ char *
716string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000717{
Christian Heimes44720832008-05-26 13:01:01 +0000718 char *s;
719 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000720 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000721 return NULL;
722 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000723}
724
725Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000726PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000727{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000728 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000729 return string_getsize(op);
730 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000731}
732
Christian Heimes44720832008-05-26 13:01:01 +0000733/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000734PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000735{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000736 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000737 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000738 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000739}
740
741int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000742PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000743 register char **s,
744 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745{
Christian Heimes44720832008-05-26 13:01:01 +0000746 if (s == NULL) {
747 PyErr_BadInternalCall();
748 return -1;
749 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000750
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000752#ifdef Py_USING_UNICODE
753 if (PyUnicode_Check(obj)) {
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
755 if (obj == NULL)
756 return -1;
757 }
758 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759#endif
Christian Heimes44720832008-05-26 13:01:01 +0000760 {
761 PyErr_Format(PyExc_TypeError,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj)->tp_name);
764 return -1;
765 }
766 }
767
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000768 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000769 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000770 *len = PyString_GET_SIZE(obj);
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000772 PyErr_SetString(PyExc_TypeError,
773 "expected string without null bytes");
774 return -1;
775 }
776 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000777}
778
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779/* -------------------------------------------------------------------- */
780/* Methods */
781
Christian Heimes44720832008-05-26 13:01:01 +0000782#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000784
Christian Heimes1a6387e2008-03-26 12:49:49 +0000785#include "stringlib/count.h"
786#include "stringlib/find.h"
787#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000789#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000790#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792
793
794static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796{
Christian Heimes44720832008-05-26 13:01:01 +0000797 Py_ssize_t i, str_len;
798 char c;
799 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Christian Heimes44720832008-05-26 13:01:01 +0000801 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000802 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000803 int ret;
804 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
811 }
812 if (flags & Py_PRINT_RAW) {
813 char *data = op->ob_sval;
814 Py_ssize_t size = Py_SIZE(op);
815 Py_BEGIN_ALLOW_THREADS
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
820 */
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
825 }
826#ifdef __VMS
827 if (size) fwrite(data, (int)size, 1, fp);
828#else
829 fwrite(data, 1, (int)size, fp);
830#endif
831 Py_END_ALLOW_THREADS
832 return 0;
833 }
834
835 /* figure out which quote to use; single is preferred */
836 quote = '\'';
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838 !memchr(op->ob_sval, '"', Py_SIZE(op)))
839 quote = '"';
840
841 str_len = Py_SIZE(op);
842 Py_BEGIN_ALLOW_THREADS
843 fputc(quote, fp);
844 for (i = 0; i < str_len; i++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
848 c = op->ob_sval[i];
849 if (c == quote || c == '\\')
850 fprintf(fp, "\\%c", c);
851 else if (c == '\t')
852 fprintf(fp, "\\t");
853 else if (c == '\n')
854 fprintf(fp, "\\n");
855 else if (c == '\r')
856 fprintf(fp, "\\r");
857 else if (c < ' ' || c >= 0x7f)
858 fprintf(fp, "\\x%02x", c & 0xff);
859 else
860 fputc(c, fp);
861 }
862 fputc(quote, fp);
863 Py_END_ALLOW_THREADS
864 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000865}
866
Christian Heimes44720832008-05-26 13:01:01 +0000867PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000868PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000869{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000870 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000871 size_t newsize = 2 + 4 * Py_SIZE(op);
872 PyObject *v;
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874 PyErr_SetString(PyExc_OverflowError,
875 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000876 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000877 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000878 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000879 if (v == NULL) {
880 return NULL;
881 }
882 else {
883 register Py_ssize_t i;
884 register char c;
885 register char *p;
886 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000887
Christian Heimes44720832008-05-26 13:01:01 +0000888 /* figure out which quote to use; single is preferred */
889 quote = '\'';
890 if (smartquotes &&
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
894
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000895 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000896 *p++ = quote;
897 for (i = 0; i < Py_SIZE(op); i++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000901 c = op->ob_sval[i];
902 if (c == quote || c == '\\')
903 *p++ = '\\', *p++ = c;
904 else if (c == '\t')
905 *p++ = '\\', *p++ = 't';
906 else if (c == '\n')
907 *p++ = '\\', *p++ = 'n';
908 else if (c == '\r')
909 *p++ = '\\', *p++ = 'r';
910 else if (c < ' ' || c >= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
913 function call). */
914 sprintf(p, "\\x%02x", c & 0xff);
915 p += 4;
916 }
917 else
918 *p++ = c;
919 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000921 *p++ = quote;
922 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000923 _PyString_Resize(
924 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000925 return v;
926 }
927}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928
929static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000930string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000931{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000932 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933}
934
Christian Heimes1a6387e2008-03-26 12:49:49 +0000935static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000936string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000938 assert(PyString_Check(s));
939 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000940 Py_INCREF(s);
941 return s;
942 }
943 else {
944 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000945 PyStringObject *t = (PyStringObject *) s;
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000947 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000948}
949
Christian Heimes44720832008-05-26 13:01:01 +0000950static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000952{
953 return Py_SIZE(a);
954}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000955
Christian Heimes44720832008-05-26 13:01:01 +0000956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000957string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000958{
959 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000960 register PyStringObject *op;
961 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000962#ifdef Py_USING_UNICODE
963 if (PyUnicode_Check(bb))
964 return PyUnicode_Concat((PyObject *)a, bb);
965#endif
966 if (PyByteArray_Check(bb))
967 return PyByteArray_Concat((PyObject *)a, bb);
968 PyErr_Format(PyExc_TypeError,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb)->tp_name);
971 return NULL;
972 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000973#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000976 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000977 if (Py_SIZE(a) == 0) {
978 Py_INCREF(bb);
979 return bb;
980 }
981 Py_INCREF(a);
982 return (PyObject *)a;
983 }
984 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +0000985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
988 */
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000991 PyErr_SetString(PyExc_OverflowError,
992 "strings are too large to concat");
993 return NULL;
994 }
995
996 /* Inline PyObject_NewVar */
Neal Norwitze7d8be82008-07-31 17:17:14 +0000997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998 PyErr_SetString(PyExc_OverflowError,
999 "strings are too large to concat");
1000 return NULL;
1001 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Christian Heimes44720832008-05-26 13:01:01 +00001003 if (op == NULL)
1004 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001006 op->ob_shash = -1;
1007 op->ob_sstate = SSTATE_NOT_INTERNED;
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010 op->ob_sval[size] = '\0';
1011 return (PyObject *) op;
1012#undef b
1013}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001014
Christian Heimes44720832008-05-26 13:01:01 +00001015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
1018 register Py_ssize_t i;
1019 register Py_ssize_t j;
1020 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001021 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001022 size_t nbytes;
1023 if (n < 0)
1024 n = 0;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1027 */
1028 size = Py_SIZE(a) * n;
1029 if (n && size / n != Py_SIZE(a)) {
1030 PyErr_SetString(PyExc_OverflowError,
1031 "repeated string is too long");
1032 return NULL;
1033 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 nbytes = (size_t)size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001039 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001040 PyErr_SetString(PyExc_OverflowError,
1041 "repeated string is too long");
1042 return NULL;
1043 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001044 op = (PyStringObject *)
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001046 if (op == NULL)
1047 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001048 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001049 op->ob_shash = -1;
1050 op->ob_sstate = SSTATE_NOT_INTERNED;
1051 op->ob_sval[size] = '\0';
1052 if (Py_SIZE(a) == 1 && n > 0) {
1053 memset(op->ob_sval, a->ob_sval[0] , n);
1054 return (PyObject *) op;
1055 }
1056 i = 0;
1057 if (i < size) {
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059 i = Py_SIZE(a);
1060 }
1061 while (i < size) {
1062 j = (i <= size-i) ? i : size-i;
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064 i += j;
1065 }
1066 return (PyObject *) op;
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001073 register Py_ssize_t j)
1074 /* j -- may be negative! */
1075{
1076 if (i < 0)
1077 i = 0;
1078 if (j < 0)
1079 j = 0; /* Avoid signed/unsigned bug in next line */
1080 if (j > Py_SIZE(a))
1081 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001083 /* It's the same as a */
1084 Py_INCREF(a);
1085 return (PyObject *)a;
1086 }
1087 if (j < i)
1088 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001089 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001090}
1091
1092static int
1093string_contains(PyObject *str_obj, PyObject *sub_obj)
1094{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001095 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001096#ifdef Py_USING_UNICODE
1097 if (PyUnicode_Check(sub_obj))
1098 return PyUnicode_Contains(str_obj, sub_obj);
1099#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001100 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001101 PyErr_Format(PyExc_TypeError,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104 return -1;
1105 }
1106 }
1107
1108 return stringlib_contains_obj(str_obj, sub_obj);
1109}
1110
1111static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001113{
1114 char pchar;
1115 PyObject *v;
1116 if (i < 0 || i >= Py_SIZE(a)) {
1117 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118 return NULL;
1119 }
1120 pchar = a->ob_sval[i];
1121 v = (PyObject *)characters[pchar & UCHAR_MAX];
1122 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001123 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001124 else {
1125#ifdef COUNT_ALLOCS
1126 one_strings++;
1127#endif
1128 Py_INCREF(v);
1129 }
1130 return v;
1131}
1132
1133static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001135{
1136 int c;
1137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
1139 PyObject *result;
1140
1141 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001142 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001143 result = Py_NotImplemented;
1144 goto out;
1145 }
1146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a) == Py_SIZE(b)
1160 && (a->ob_sval[0] == b->ob_sval[0]
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162 result = Py_True;
1163 } else {
1164 result = Py_False;
1165 }
1166 goto out;
1167 }
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169 min_len = (len_a < len_b) ? len_a : len_b;
1170 if (min_len > 0) {
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172 if (c==0)
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174 } else
1175 c = 0;
1176 if (c == 0)
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178 switch (op) {
1179 case Py_LT: c = c < 0; break;
1180 case Py_LE: c = c <= 0; break;
1181 case Py_EQ: assert(0); break; /* unreachable */
1182 case Py_NE: c = c != 0; break;
1183 case Py_GT: c = c > 0; break;
1184 case Py_GE: c = c >= 0; break;
1185 default:
1186 result = Py_NotImplemented;
1187 goto out;
1188 }
1189 result = c ? Py_True : Py_False;
1190 out:
1191 Py_INCREF(result);
1192 return result;
1193}
1194
1195int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001196_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001197{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001198 PyStringObject *a = (PyStringObject*) o1;
1199 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001200 return Py_SIZE(a) == Py_SIZE(b)
1201 && *a->ob_sval == *b->ob_sval
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1203}
1204
1205static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001207{
1208 register Py_ssize_t len;
1209 register unsigned char *p;
1210 register long x;
1211
1212 if (a->ob_shash != -1)
1213 return a->ob_shash;
1214 len = Py_SIZE(a);
1215 p = (unsigned char *) a->ob_sval;
1216 x = *p << 7;
1217 while (--len >= 0)
1218 x = (1000003*x) ^ *p++;
1219 x ^= Py_SIZE(a);
1220 if (x == -1)
1221 x = -2;
1222 a->ob_shash = x;
1223 return x;
1224}
1225
1226static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001227string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001228{
1229 if (PyIndex_Check(item)) {
1230 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231 if (i == -1 && PyErr_Occurred())
1232 return NULL;
1233 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001234 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001235 return string_item(self, i);
1236 }
1237 else if (PySlice_Check(item)) {
1238 Py_ssize_t start, stop, step, slicelength, cur, i;
1239 char* source_buf;
1240 char* result_buf;
1241 PyObject* result;
1242
1243 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001244 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001245 &start, &stop, &step, &slicelength) < 0) {
1246 return NULL;
1247 }
1248
1249 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001250 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001251 }
1252 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253 slicelength == PyString_GET_SIZE(self) &&
1254 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001255 Py_INCREF(self);
1256 return (PyObject *)self;
1257 }
1258 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259 return PyString_FromStringAndSize(
1260 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001261 slicelength);
1262 }
1263 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001264 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001265 result_buf = (char *)PyMem_Malloc(slicelength);
1266 if (result_buf == NULL)
1267 return PyErr_NoMemory();
1268
1269 for (cur = start, i = 0; i < slicelength;
1270 cur += step, i++) {
1271 result_buf[i] = source_buf[cur];
1272 }
1273
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001274 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001275 slicelength);
1276 PyMem_Free(result_buf);
1277 return result;
1278 }
1279 }
1280 else {
1281 PyErr_Format(PyExc_TypeError,
1282 "string indices must be integers, not %.200s",
1283 Py_TYPE(item)->tp_name);
1284 return NULL;
1285 }
1286}
1287
1288static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001289string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001290{
1291 if ( index != 0 ) {
1292 PyErr_SetString(PyExc_SystemError,
1293 "accessing non-existent string segment");
1294 return -1;
1295 }
1296 *ptr = (void *)self->ob_sval;
1297 return Py_SIZE(self);
1298}
1299
1300static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001301string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001302{
1303 PyErr_SetString(PyExc_TypeError,
1304 "Cannot use string as modifiable buffer");
1305 return -1;
1306}
1307
1308static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001310{
1311 if ( lenp )
1312 *lenp = Py_SIZE(self);
1313 return 1;
1314}
1315
1316static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001317string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001318{
1319 if ( index != 0 ) {
1320 PyErr_SetString(PyExc_SystemError,
1321 "accessing non-existent string segment");
1322 return -1;
1323 }
1324 *ptr = self->ob_sval;
1325 return Py_SIZE(self);
1326}
1327
1328static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001329string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001330{
1331 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001332 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001333}
1334
1335static PySequenceMethods string_as_sequence = {
1336 (lenfunc)string_length, /*sq_length*/
1337 (binaryfunc)string_concat, /*sq_concat*/
1338 (ssizeargfunc)string_repeat, /*sq_repeat*/
1339 (ssizeargfunc)string_item, /*sq_item*/
1340 (ssizessizeargfunc)string_slice, /*sq_slice*/
1341 0, /*sq_ass_item*/
1342 0, /*sq_ass_slice*/
1343 (objobjproc)string_contains /*sq_contains*/
1344};
1345
1346static PyMappingMethods string_as_mapping = {
1347 (lenfunc)string_length,
1348 (binaryfunc)string_subscript,
1349 0,
1350};
1351
1352static PyBufferProcs string_as_buffer = {
1353 (readbufferproc)string_buffer_getreadbuf,
1354 (writebufferproc)string_buffer_getwritebuf,
1355 (segcountproc)string_buffer_getsegcount,
1356 (charbufferproc)string_buffer_getcharbuf,
1357 (getbufferproc)string_buffer_getbuffer,
1358 0, /* XXX */
1359};
1360
1361
1362
1363#define LEFTSTRIP 0
1364#define RIGHTSTRIP 1
1365#define BOTHSTRIP 2
1366
1367/* Arrays indexed by above */
1368static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1369
1370#define STRIPNAME(i) (stripformat[i]+3)
1371
Christian Heimes1a6387e2008-03-26 12:49:49 +00001372
1373/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001374#define Py_STRING_MATCH(target, offset, pattern, length) \
1375 (target[offset] == pattern[0] && \
1376 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001377 !memcmp(target+offset+1, pattern+1, length-2) )
1378
1379
Christian Heimes1a6387e2008-03-26 12:49:49 +00001380/* Overallocate the initial list to reduce the number of reallocs for small
1381 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1382 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1383 text (roughly 11 words per line) and field delimited data (usually 1-10
1384 fields). For large strings the split algorithms are bandwidth limited
1385 so increasing the preallocation likely will not improve things.*/
1386
1387#define MAX_PREALLOC 12
1388
1389/* 5 splits gives 6 elements */
1390#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001391 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001392
Christian Heimes44720832008-05-26 13:01:01 +00001393#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001394 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001395 (right) - (left)); \
1396 if (str == NULL) \
1397 goto onError; \
1398 if (PyList_Append(list, str)) { \
1399 Py_DECREF(str); \
1400 goto onError; \
1401 } \
1402 else \
1403 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001404
Christian Heimes44720832008-05-26 13:01:01 +00001405#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001406 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001407 (right) - (left)); \
1408 if (str == NULL) \
1409 goto onError; \
1410 if (count < MAX_PREALLOC) { \
1411 PyList_SET_ITEM(list, count, str); \
1412 } else { \
1413 if (PyList_Append(list, str)) { \
1414 Py_DECREF(str); \
1415 goto onError; \
1416 } \
1417 else \
1418 Py_DECREF(str); \
1419 } \
1420 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001421
1422/* Always force the list to the expected size. */
1423#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1424
Christian Heimes44720832008-05-26 13:01:01 +00001425#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1426#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1427#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1428#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001429
1430Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001431split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001432{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001433 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001434 Py_ssize_t i, j, count=0;
1435 PyObject *str;
1436 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437
Christian Heimes44720832008-05-26 13:01:01 +00001438 if (list == NULL)
1439 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001440
Christian Heimes44720832008-05-26 13:01:01 +00001441 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001442
Christian Heimes44720832008-05-26 13:01:01 +00001443 while (maxsplit-- > 0) {
1444 SKIP_SPACE(s, i, len);
1445 if (i==len) break;
1446 j = i; i++;
1447 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001448 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001449 /* No whitespace in self, so just use it as list[0] */
1450 Py_INCREF(self);
1451 PyList_SET_ITEM(list, 0, (PyObject *)self);
1452 count++;
1453 break;
1454 }
1455 SPLIT_ADD(s, j, i);
1456 }
1457
1458 if (i < len) {
1459 /* Only occurs when maxsplit was reached */
1460 /* Skip any remaining whitespace and copy to end of string */
1461 SKIP_SPACE(s, i, len);
1462 if (i != len)
1463 SPLIT_ADD(s, i, len);
1464 }
1465 FIX_PREALLOC_SIZE(list);
1466 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001468 Py_DECREF(list);
1469 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001470}
1471
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001473split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001474{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001475 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001476 register Py_ssize_t i, j, count=0;
1477 PyObject *str;
1478 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479
Christian Heimes44720832008-05-26 13:01:01 +00001480 if (list == NULL)
1481 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482
Christian Heimes44720832008-05-26 13:01:01 +00001483 i = j = 0;
1484 while ((j < len) && (maxcount-- > 0)) {
1485 for(; j<len; j++) {
1486 /* I found that using memchr makes no difference */
1487 if (s[j] == ch) {
1488 SPLIT_ADD(s, i, j);
1489 i = j = j + 1;
1490 break;
1491 }
1492 }
1493 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001494 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001495 /* ch not in self, so just use self as list[0] */
1496 Py_INCREF(self);
1497 PyList_SET_ITEM(list, 0, (PyObject *)self);
1498 count++;
1499 }
1500 else if (i <= len) {
1501 SPLIT_ADD(s, i, len);
1502 }
1503 FIX_PREALLOC_SIZE(list);
1504 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001505
1506 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001507 Py_DECREF(list);
1508 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509}
1510
1511PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001512"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001513\n\
Christian Heimes44720832008-05-26 13:01:01 +00001514Return a list of the words in the string S, using sep as the\n\
1515delimiter string. If maxsplit is given, at most maxsplit\n\
1516splits are done. If sep is not specified or is None, any\n\
1517whitespace string is a separator and empty strings are removed\n\
1518from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001519
1520static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001521string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001523 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001524 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001525 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001526 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001528 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001529#endif
1530
Christian Heimes44720832008-05-26 13:01:01 +00001531 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1532 return NULL;
1533 if (maxsplit < 0)
1534 maxsplit = PY_SSIZE_T_MAX;
1535 if (subobj == Py_None)
1536 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001537 if (PyString_Check(subobj)) {
1538 sub = PyString_AS_STRING(subobj);
1539 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001540 }
1541#ifdef Py_USING_UNICODE
1542 else if (PyUnicode_Check(subobj))
1543 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1544#endif
1545 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1546 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001547
Christian Heimes44720832008-05-26 13:01:01 +00001548 if (n == 0) {
1549 PyErr_SetString(PyExc_ValueError, "empty separator");
1550 return NULL;
1551 }
1552 else if (n == 1)
1553 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001554
Christian Heimes44720832008-05-26 13:01:01 +00001555 list = PyList_New(PREALLOC_SIZE(maxsplit));
1556 if (list == NULL)
1557 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
1559#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001560 i = j = 0;
1561 while (maxsplit-- > 0) {
1562 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1563 if (pos < 0)
1564 break;
1565 j = i+pos;
1566 SPLIT_ADD(s, i, j);
1567 i = j + n;
1568 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569#else
Christian Heimes44720832008-05-26 13:01:01 +00001570 i = j = 0;
1571 while ((j+n <= len) && (maxsplit-- > 0)) {
1572 for (; j+n <= len; j++) {
1573 if (Py_STRING_MATCH(s, j, sub, n)) {
1574 SPLIT_ADD(s, i, j);
1575 i = j = j + n;
1576 break;
1577 }
1578 }
1579 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001580#endif
Christian Heimes44720832008-05-26 13:01:01 +00001581 SPLIT_ADD(s, i, len);
1582 FIX_PREALLOC_SIZE(list);
1583 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001584
Christian Heimes44720832008-05-26 13:01:01 +00001585 onError:
1586 Py_DECREF(list);
1587 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001588}
1589
1590PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001591"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001592\n\
Christian Heimes44720832008-05-26 13:01:01 +00001593Searches for the separator sep in S, and returns the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001594the separator itself, and the part after it. If the separator is not\n\
Christian Heimes44720832008-05-26 13:01:01 +00001595found, returns S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001596
1597static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001598string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001599{
Christian Heimes44720832008-05-26 13:01:01 +00001600 const char *sep;
1601 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001602
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001603 if (PyString_Check(sep_obj)) {
1604 sep = PyString_AS_STRING(sep_obj);
1605 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001606 }
1607#ifdef Py_USING_UNICODE
1608 else if (PyUnicode_Check(sep_obj))
1609 return PyUnicode_Partition((PyObject *) self, sep_obj);
1610#endif
1611 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1612 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001613
Christian Heimes44720832008-05-26 13:01:01 +00001614 return stringlib_partition(
1615 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001616 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001617 sep_obj, sep, sep_len
1618 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001619}
1620
1621PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001622"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001623\n\
Christian Heimes44720832008-05-26 13:01:01 +00001624Searches for the separator sep in S, starting at the end of S, and returns\n\
1625the part before it, the separator itself, and the part after it. If the\n\
1626separator is not found, returns two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001627
1628static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001629string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001630{
Christian Heimes44720832008-05-26 13:01:01 +00001631 const char *sep;
1632 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001633
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001634 if (PyString_Check(sep_obj)) {
1635 sep = PyString_AS_STRING(sep_obj);
1636 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001637 }
1638#ifdef Py_USING_UNICODE
1639 else if (PyUnicode_Check(sep_obj))
1640 return PyUnicode_Partition((PyObject *) self, sep_obj);
1641#endif
1642 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1643 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644
Christian Heimes44720832008-05-26 13:01:01 +00001645 return stringlib_rpartition(
1646 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001647 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001648 sep_obj, sep, sep_len
1649 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001650}
1651
1652Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001653rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001654{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001655 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001656 Py_ssize_t i, j, count=0;
1657 PyObject *str;
1658 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001659
Christian Heimes44720832008-05-26 13:01:01 +00001660 if (list == NULL)
1661 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001662
Christian Heimes44720832008-05-26 13:01:01 +00001663 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001664
Christian Heimes44720832008-05-26 13:01:01 +00001665 while (maxsplit-- > 0) {
1666 RSKIP_SPACE(s, i);
1667 if (i<0) break;
1668 j = i; i--;
1669 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001670 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001671 /* No whitespace in self, so just use it as list[0] */
1672 Py_INCREF(self);
1673 PyList_SET_ITEM(list, 0, (PyObject *)self);
1674 count++;
1675 break;
1676 }
1677 SPLIT_ADD(s, i + 1, j + 1);
1678 }
1679 if (i >= 0) {
1680 /* Only occurs when maxsplit was reached */
1681 /* Skip any remaining whitespace and copy to beginning of string */
1682 RSKIP_SPACE(s, i);
1683 if (i >= 0)
1684 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001685
Christian Heimes44720832008-05-26 13:01:01 +00001686 }
1687 FIX_PREALLOC_SIZE(list);
1688 if (PyList_Reverse(list) < 0)
1689 goto onError;
1690 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001691 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001692 Py_DECREF(list);
1693 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694}
1695
1696Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001697rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001698{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001699 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001700 register Py_ssize_t i, j, count=0;
1701 PyObject *str;
1702 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001703
Christian Heimes44720832008-05-26 13:01:01 +00001704 if (list == NULL)
1705 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001706
Christian Heimes44720832008-05-26 13:01:01 +00001707 i = j = len - 1;
1708 while ((i >= 0) && (maxcount-- > 0)) {
1709 for (; i >= 0; i--) {
1710 if (s[i] == ch) {
1711 SPLIT_ADD(s, i + 1, j + 1);
1712 j = i = i - 1;
1713 break;
1714 }
1715 }
1716 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001717 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001718 /* ch not in self, so just use self as list[0] */
1719 Py_INCREF(self);
1720 PyList_SET_ITEM(list, 0, (PyObject *)self);
1721 count++;
1722 }
1723 else if (j >= -1) {
1724 SPLIT_ADD(s, 0, j + 1);
1725 }
1726 FIX_PREALLOC_SIZE(list);
1727 if (PyList_Reverse(list) < 0)
1728 goto onError;
1729 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001730
Christian Heimes44720832008-05-26 13:01:01 +00001731 onError:
1732 Py_DECREF(list);
1733 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001734}
1735
1736PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001737"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001738\n\
Christian Heimes44720832008-05-26 13:01:01 +00001739Return a list of the words in the string S, using sep as the\n\
1740delimiter string, starting at the end of the string and working\n\
1741to the front. If maxsplit is given, at most maxsplit splits are\n\
1742done. If sep is not specified or is None, any whitespace string\n\
1743is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001744
1745static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001746string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001747{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001748 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001749 Py_ssize_t maxsplit = -1, count=0;
1750 const char *s, *sub;
1751 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001752
Christian Heimes44720832008-05-26 13:01:01 +00001753 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1754 return NULL;
1755 if (maxsplit < 0)
1756 maxsplit = PY_SSIZE_T_MAX;
1757 if (subobj == Py_None)
1758 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001759 if (PyString_Check(subobj)) {
1760 sub = PyString_AS_STRING(subobj);
1761 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001762 }
1763#ifdef Py_USING_UNICODE
1764 else if (PyUnicode_Check(subobj))
1765 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1766#endif
1767 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1768 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001769
Christian Heimes44720832008-05-26 13:01:01 +00001770 if (n == 0) {
1771 PyErr_SetString(PyExc_ValueError, "empty separator");
1772 return NULL;
1773 }
1774 else if (n == 1)
1775 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001776
Christian Heimes44720832008-05-26 13:01:01 +00001777 list = PyList_New(PREALLOC_SIZE(maxsplit));
1778 if (list == NULL)
1779 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001780
Christian Heimes44720832008-05-26 13:01:01 +00001781 j = len;
1782 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001784 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001785 while ( (i >= 0) && (maxsplit-- > 0) ) {
1786 for (; i>=0; i--) {
1787 if (Py_STRING_MATCH(s, i, sub, n)) {
1788 SPLIT_ADD(s, i + n, j);
1789 j = i;
1790 i -= n;
1791 break;
1792 }
1793 }
1794 }
1795 SPLIT_ADD(s, 0, j);
1796 FIX_PREALLOC_SIZE(list);
1797 if (PyList_Reverse(list) < 0)
1798 goto onError;
1799 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001800
1801onError:
Christian Heimes44720832008-05-26 13:01:01 +00001802 Py_DECREF(list);
1803 return NULL;
1804}
1805
1806
1807PyDoc_STRVAR(join__doc__,
1808"S.join(sequence) -> string\n\
1809\n\
1810Return a string which is the concatenation of the strings in the\n\
1811sequence. The separator between elements is S.");
1812
1813static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001814string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001815{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001816 char *sep = PyString_AS_STRING(self);
1817 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001818 PyObject *res = NULL;
1819 char *p;
1820 Py_ssize_t seqlen = 0;
1821 size_t sz = 0;
1822 Py_ssize_t i;
1823 PyObject *seq, *item;
1824
1825 seq = PySequence_Fast(orig, "");
1826 if (seq == NULL) {
1827 return NULL;
1828 }
1829
1830 seqlen = PySequence_Size(seq);
1831 if (seqlen == 0) {
1832 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001833 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001834 }
1835 if (seqlen == 1) {
1836 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001837 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001838 Py_INCREF(item);
1839 Py_DECREF(seq);
1840 return item;
1841 }
1842 }
1843
1844 /* There are at least two things to join, or else we have a subclass
1845 * of the builtin types in the sequence.
1846 * Do a pre-pass to figure out the total amount of space we'll
1847 * need (sz), see whether any argument is absurd, and defer to
1848 * the Unicode join if appropriate.
1849 */
1850 for (i = 0; i < seqlen; i++) {
1851 const size_t old_sz = sz;
1852 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001853 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001854#ifdef Py_USING_UNICODE
1855 if (PyUnicode_Check(item)) {
1856 /* Defer to Unicode join.
1857 * CAUTION: There's no gurantee that the
1858 * original sequence can be iterated over
1859 * again, so we must pass seq here.
1860 */
1861 PyObject *result;
1862 result = PyUnicode_Join((PyObject *)self, seq);
1863 Py_DECREF(seq);
1864 return result;
1865 }
1866#endif
1867 PyErr_Format(PyExc_TypeError,
1868 "sequence item %zd: expected string,"
1869 " %.80s found",
1870 i, Py_TYPE(item)->tp_name);
1871 Py_DECREF(seq);
1872 return NULL;
1873 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001874 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001875 if (i != 0)
1876 sz += seplen;
1877 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1878 PyErr_SetString(PyExc_OverflowError,
1879 "join() result is too long for a Python string");
1880 Py_DECREF(seq);
1881 return NULL;
1882 }
1883 }
1884
1885 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001886 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001887 if (res == NULL) {
1888 Py_DECREF(seq);
1889 return NULL;
1890 }
1891
1892 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001893 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001894 for (i = 0; i < seqlen; ++i) {
1895 size_t n;
1896 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001897 n = PyString_GET_SIZE(item);
1898 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001899 p += n;
1900 if (i < seqlen - 1) {
1901 Py_MEMCPY(p, sep, seplen);
1902 p += seplen;
1903 }
1904 }
1905
1906 Py_DECREF(seq);
1907 return res;
1908}
1909
1910PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001911_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001912{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001913 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001914 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001915 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001916}
1917
1918Py_LOCAL_INLINE(void)
1919string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1920{
1921 if (*end > len)
1922 *end = len;
1923 else if (*end < 0)
1924 *end += len;
1925 if (*end < 0)
1926 *end = 0;
1927 if (*start < 0)
1928 *start += len;
1929 if (*start < 0)
1930 *start = 0;
1931}
1932
1933Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001934string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001935{
1936 PyObject *subobj;
1937 const char *sub;
1938 Py_ssize_t sub_len;
1939 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1940 PyObject *obj_start=Py_None, *obj_end=Py_None;
1941
1942 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1943 &obj_start, &obj_end))
1944 return -2;
1945 /* To support None in "start" and "end" arguments, meaning
1946 the same as if they were not passed.
1947 */
1948 if (obj_start != Py_None)
1949 if (!_PyEval_SliceIndex(obj_start, &start))
1950 return -2;
1951 if (obj_end != Py_None)
1952 if (!_PyEval_SliceIndex(obj_end, &end))
1953 return -2;
1954
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001955 if (PyString_Check(subobj)) {
1956 sub = PyString_AS_STRING(subobj);
1957 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001958 }
1959#ifdef Py_USING_UNICODE
1960 else if (PyUnicode_Check(subobj))
1961 return PyUnicode_Find(
1962 (PyObject *)self, subobj, start, end, dir);
1963#endif
1964 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1965 /* XXX - the "expected a character buffer object" is pretty
1966 confusing for a non-expert. remap to something else ? */
1967 return -2;
1968
1969 if (dir > 0)
1970 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001971 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001972 sub, sub_len, start, end);
1973 else
1974 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001975 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001976 sub, sub_len, start, end);
1977}
1978
1979
1980PyDoc_STRVAR(find__doc__,
1981"S.find(sub [,start [,end]]) -> int\n\
1982\n\
1983Return the lowest index in S where substring sub is found,\n\
1984such that sub is contained within s[start:end]. Optional\n\
1985arguments start and end are interpreted as in slice notation.\n\
1986\n\
1987Return -1 on failure.");
1988
1989static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001991{
1992 Py_ssize_t result = string_find_internal(self, args, +1);
1993 if (result == -2)
1994 return NULL;
1995 return PyInt_FromSsize_t(result);
1996}
1997
1998
1999PyDoc_STRVAR(index__doc__,
2000"S.index(sub [,start [,end]]) -> int\n\
2001\n\
2002Like S.find() but raise ValueError when the substring is not found.");
2003
2004static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002005string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002006{
2007 Py_ssize_t result = string_find_internal(self, args, +1);
2008 if (result == -2)
2009 return NULL;
2010 if (result == -1) {
2011 PyErr_SetString(PyExc_ValueError,
2012 "substring not found");
2013 return NULL;
2014 }
2015 return PyInt_FromSsize_t(result);
2016}
2017
2018
2019PyDoc_STRVAR(rfind__doc__,
2020"S.rfind(sub [,start [,end]]) -> int\n\
2021\n\
2022Return the highest index in S where substring sub is found,\n\
2023such that sub is contained within s[start:end]. Optional\n\
2024arguments start and end are interpreted as in slice notation.\n\
2025\n\
2026Return -1 on failure.");
2027
2028static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002029string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002030{
2031 Py_ssize_t result = string_find_internal(self, args, -1);
2032 if (result == -2)
2033 return NULL;
2034 return PyInt_FromSsize_t(result);
2035}
2036
2037
2038PyDoc_STRVAR(rindex__doc__,
2039"S.rindex(sub [,start [,end]]) -> int\n\
2040\n\
2041Like S.rfind() but raise ValueError when the substring is not found.");
2042
2043static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
2046 Py_ssize_t result = string_find_internal(self, args, -1);
2047 if (result == -2)
2048 return NULL;
2049 if (result == -1) {
2050 PyErr_SetString(PyExc_ValueError,
2051 "substring not found");
2052 return NULL;
2053 }
2054 return PyInt_FromSsize_t(result);
2055}
2056
2057
2058Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002059do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002060{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002061 char *s = PyString_AS_STRING(self);
2062 Py_ssize_t len = PyString_GET_SIZE(self);
2063 char *sep = PyString_AS_STRING(sepobj);
2064 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002065 Py_ssize_t i, j;
2066
2067 i = 0;
2068 if (striptype != RIGHTSTRIP) {
2069 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2070 i++;
2071 }
2072 }
2073
2074 j = len;
2075 if (striptype != LEFTSTRIP) {
2076 do {
2077 j--;
2078 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2079 j++;
2080 }
2081
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002082 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002083 Py_INCREF(self);
2084 return (PyObject*)self;
2085 }
2086 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002087 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002088}
2089
2090
2091Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002092do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002093{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002094 char *s = PyString_AS_STRING(self);
2095 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002096
2097 i = 0;
2098 if (striptype != RIGHTSTRIP) {
2099 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2100 i++;
2101 }
2102 }
2103
2104 j = len;
2105 if (striptype != LEFTSTRIP) {
2106 do {
2107 j--;
2108 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2109 j++;
2110 }
2111
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002112 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002113 Py_INCREF(self);
2114 return (PyObject*)self;
2115 }
2116 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002118}
2119
2120
2121Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002122do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002123{
2124 PyObject *sep = NULL;
2125
2126 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2127 return NULL;
2128
2129 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002130 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002131 return do_xstrip(self, striptype, sep);
2132#ifdef Py_USING_UNICODE
2133 else if (PyUnicode_Check(sep)) {
2134 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2135 PyObject *res;
2136 if (uniself==NULL)
2137 return NULL;
2138 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2139 striptype, sep);
2140 Py_DECREF(uniself);
2141 return res;
2142 }
2143#endif
2144 PyErr_Format(PyExc_TypeError,
2145#ifdef Py_USING_UNICODE
2146 "%s arg must be None, str or unicode",
2147#else
2148 "%s arg must be None or str",
2149#endif
2150 STRIPNAME(striptype));
2151 return NULL;
2152 }
2153
2154 return do_strip(self, striptype);
2155}
2156
2157
2158PyDoc_STRVAR(strip__doc__,
2159"S.strip([chars]) -> string or unicode\n\
2160\n\
2161Return a copy of the string S with leading and trailing\n\
2162whitespace removed.\n\
2163If chars is given and not None, remove characters in chars instead.\n\
2164If chars is unicode, S will be converted to unicode before stripping");
2165
2166static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002167string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002168{
2169 if (PyTuple_GET_SIZE(args) == 0)
2170 return do_strip(self, BOTHSTRIP); /* Common case */
2171 else
2172 return do_argstrip(self, BOTHSTRIP, args);
2173}
2174
2175
2176PyDoc_STRVAR(lstrip__doc__,
2177"S.lstrip([chars]) -> string or unicode\n\
2178\n\
2179Return a copy of the string S with leading whitespace removed.\n\
2180If chars is given and not None, remove characters in chars instead.\n\
2181If chars is unicode, S will be converted to unicode before stripping");
2182
2183static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002184string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002185{
2186 if (PyTuple_GET_SIZE(args) == 0)
2187 return do_strip(self, LEFTSTRIP); /* Common case */
2188 else
2189 return do_argstrip(self, LEFTSTRIP, args);
2190}
2191
2192
2193PyDoc_STRVAR(rstrip__doc__,
2194"S.rstrip([chars]) -> string or unicode\n\
2195\n\
2196Return a copy of the string S with trailing whitespace removed.\n\
2197If chars is given and not None, remove characters in chars instead.\n\
2198If chars is unicode, S will be converted to unicode before stripping");
2199
2200static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002201string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002202{
2203 if (PyTuple_GET_SIZE(args) == 0)
2204 return do_strip(self, RIGHTSTRIP); /* Common case */
2205 else
2206 return do_argstrip(self, RIGHTSTRIP, args);
2207}
2208
2209
2210PyDoc_STRVAR(lower__doc__,
2211"S.lower() -> string\n\
2212\n\
2213Return a copy of the string S converted to lowercase.");
2214
2215/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2216#ifndef _tolower
2217#define _tolower tolower
2218#endif
2219
2220static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002221string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002222{
2223 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002224 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002225 PyObject *newobj;
2226
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002227 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002228 if (!newobj)
2229 return NULL;
2230
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002231 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002232
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002233 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002234
2235 for (i = 0; i < n; i++) {
2236 int c = Py_CHARMASK(s[i]);
2237 if (isupper(c))
2238 s[i] = _tolower(c);
2239 }
2240
2241 return newobj;
2242}
2243
2244PyDoc_STRVAR(upper__doc__,
2245"S.upper() -> string\n\
2246\n\
2247Return a copy of the string S converted to uppercase.");
2248
2249#ifndef _toupper
2250#define _toupper toupper
2251#endif
2252
2253static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002254string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002255{
2256 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002257 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002258 PyObject *newobj;
2259
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002260 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002261 if (!newobj)
2262 return NULL;
2263
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002264 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002265
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002267
2268 for (i = 0; i < n; i++) {
2269 int c = Py_CHARMASK(s[i]);
2270 if (islower(c))
2271 s[i] = _toupper(c);
2272 }
2273
2274 return newobj;
2275}
2276
2277PyDoc_STRVAR(title__doc__,
2278"S.title() -> string\n\
2279\n\
2280Return a titlecased version of S, i.e. words start with uppercase\n\
2281characters, all remaining cased characters have lowercase.");
2282
2283static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002284string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002285{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002286 char *s = PyString_AS_STRING(self), *s_new;
2287 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002288 int previous_is_cased = 0;
2289 PyObject *newobj;
2290
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002291 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002292 if (newobj == NULL)
2293 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002294 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002295 for (i = 0; i < n; i++) {
2296 int c = Py_CHARMASK(*s++);
2297 if (islower(c)) {
2298 if (!previous_is_cased)
2299 c = toupper(c);
2300 previous_is_cased = 1;
2301 } else if (isupper(c)) {
2302 if (previous_is_cased)
2303 c = tolower(c);
2304 previous_is_cased = 1;
2305 } else
2306 previous_is_cased = 0;
2307 *s_new++ = c;
2308 }
2309 return newobj;
2310}
2311
2312PyDoc_STRVAR(capitalize__doc__,
2313"S.capitalize() -> string\n\
2314\n\
2315Return a copy of the string S with only its first character\n\
2316capitalized.");
2317
2318static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002319string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002321 char *s = PyString_AS_STRING(self), *s_new;
2322 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002323 PyObject *newobj;
2324
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002325 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002326 if (newobj == NULL)
2327 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002328 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002329 if (0 < n) {
2330 int c = Py_CHARMASK(*s++);
2331 if (islower(c))
2332 *s_new = toupper(c);
2333 else
2334 *s_new = c;
2335 s_new++;
2336 }
2337 for (i = 1; i < n; i++) {
2338 int c = Py_CHARMASK(*s++);
2339 if (isupper(c))
2340 *s_new = tolower(c);
2341 else
2342 *s_new = c;
2343 s_new++;
2344 }
2345 return newobj;
2346}
2347
2348
2349PyDoc_STRVAR(count__doc__,
2350"S.count(sub[, start[, end]]) -> int\n\
2351\n\
2352Return the number of non-overlapping occurrences of substring sub in\n\
2353string S[start:end]. Optional arguments start and end are interpreted\n\
2354as in slice notation.");
2355
2356static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002357string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002358{
2359 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002360 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002361 Py_ssize_t sub_len;
2362 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2363
2364 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2365 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2366 return NULL;
2367
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002368 if (PyString_Check(sub_obj)) {
2369 sub = PyString_AS_STRING(sub_obj);
2370 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002371 }
2372#ifdef Py_USING_UNICODE
2373 else if (PyUnicode_Check(sub_obj)) {
2374 Py_ssize_t count;
2375 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2376 if (count == -1)
2377 return NULL;
2378 else
2379 return PyInt_FromSsize_t(count);
2380 }
2381#endif
2382 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2383 return NULL;
2384
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002385 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002386
2387 return PyInt_FromSsize_t(
2388 stringlib_count(str + start, end - start, sub, sub_len)
2389 );
2390}
2391
2392PyDoc_STRVAR(swapcase__doc__,
2393"S.swapcase() -> string\n\
2394\n\
2395Return a copy of the string S with uppercase characters\n\
2396converted to lowercase and vice versa.");
2397
2398static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002399string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002400{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002401 char *s = PyString_AS_STRING(self), *s_new;
2402 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002403 PyObject *newobj;
2404
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002405 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002406 if (newobj == NULL)
2407 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002408 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002409 for (i = 0; i < n; i++) {
2410 int c = Py_CHARMASK(*s++);
2411 if (islower(c)) {
2412 *s_new = toupper(c);
2413 }
2414 else if (isupper(c)) {
2415 *s_new = tolower(c);
2416 }
2417 else
2418 *s_new = c;
2419 s_new++;
2420 }
2421 return newobj;
2422}
2423
2424
2425PyDoc_STRVAR(translate__doc__,
2426"S.translate(table [,deletechars]) -> string\n\
2427\n\
2428Return a copy of the string S, where all characters occurring\n\
2429in the optional argument deletechars are removed, and the\n\
2430remaining characters have been mapped through the given\n\
2431translation table, which must be a string of length 256.");
2432
2433static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002434string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002435{
2436 register char *input, *output;
2437 const char *table;
2438 register Py_ssize_t i, c, changed = 0;
2439 PyObject *input_obj = (PyObject*)self;
2440 const char *output_start, *del_table=NULL;
2441 Py_ssize_t inlen, tablen, dellen = 0;
2442 PyObject *result;
2443 int trans_table[256];
2444 PyObject *tableobj, *delobj = NULL;
2445
2446 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2447 &tableobj, &delobj))
2448 return NULL;
2449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002450 if (PyString_Check(tableobj)) {
2451 table = PyString_AS_STRING(tableobj);
2452 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002453 }
2454 else if (tableobj == Py_None) {
2455 table = NULL;
2456 tablen = 256;
2457 }
2458#ifdef Py_USING_UNICODE
2459 else if (PyUnicode_Check(tableobj)) {
2460 /* Unicode .translate() does not support the deletechars
2461 parameter; instead a mapping to None will cause characters
2462 to be deleted. */
2463 if (delobj != NULL) {
2464 PyErr_SetString(PyExc_TypeError,
2465 "deletions are implemented differently for unicode");
2466 return NULL;
2467 }
2468 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2469 }
2470#endif
2471 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2472 return NULL;
2473
2474 if (tablen != 256) {
2475 PyErr_SetString(PyExc_ValueError,
2476 "translation table must be 256 characters long");
2477 return NULL;
2478 }
2479
2480 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002481 if (PyString_Check(delobj)) {
2482 del_table = PyString_AS_STRING(delobj);
2483 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002484 }
2485#ifdef Py_USING_UNICODE
2486 else if (PyUnicode_Check(delobj)) {
2487 PyErr_SetString(PyExc_TypeError,
2488 "deletions are implemented differently for unicode");
2489 return NULL;
2490 }
2491#endif
2492 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2493 return NULL;
2494 }
2495 else {
2496 del_table = NULL;
2497 dellen = 0;
2498 }
2499
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002500 inlen = PyString_GET_SIZE(input_obj);
2501 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002502 if (result == NULL)
2503 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002504 output_start = output = PyString_AsString(result);
2505 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002506
2507 if (dellen == 0 && table != NULL) {
2508 /* If no deletions are required, use faster code */
2509 for (i = inlen; --i >= 0; ) {
2510 c = Py_CHARMASK(*input++);
2511 if (Py_CHARMASK((*output++ = table[c])) != c)
2512 changed = 1;
2513 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002514 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002515 return result;
2516 Py_DECREF(result);
2517 Py_INCREF(input_obj);
2518 return input_obj;
2519 }
2520
2521 if (table == NULL) {
2522 for (i = 0; i < 256; i++)
2523 trans_table[i] = Py_CHARMASK(i);
2524 } else {
2525 for (i = 0; i < 256; i++)
2526 trans_table[i] = Py_CHARMASK(table[i]);
2527 }
2528
2529 for (i = 0; i < dellen; i++)
2530 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2531
2532 for (i = inlen; --i >= 0; ) {
2533 c = Py_CHARMASK(*input++);
2534 if (trans_table[c] != -1)
2535 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2536 continue;
2537 changed = 1;
2538 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002539 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002540 Py_DECREF(result);
2541 Py_INCREF(input_obj);
2542 return input_obj;
2543 }
2544 /* Fix the size of the resulting string */
2545 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002546 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002547 return result;
2548}
2549
2550
2551#define FORWARD 1
2552#define REVERSE -1
2553
2554/* find and count characters and substrings */
2555
2556#define findchar(target, target_len, c) \
2557 ((char *)memchr((const void *)(target), c, target_len))
2558
2559/* String ops must return a string. */
2560/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002561Py_LOCAL(PyStringObject *)
2562return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002563{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002564 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002565 Py_INCREF(self);
2566 return self;
2567 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002568 return (PyStringObject *)PyString_FromStringAndSize(
2569 PyString_AS_STRING(self),
2570 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002571}
2572
2573Py_LOCAL_INLINE(Py_ssize_t)
2574countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2575{
2576 Py_ssize_t count=0;
2577 const char *start=target;
2578 const char *end=target+target_len;
2579
2580 while ( (start=findchar(start, end-start, c)) != NULL ) {
2581 count++;
2582 if (count >= maxcount)
2583 break;
2584 start += 1;
2585 }
2586 return count;
2587}
2588
2589Py_LOCAL(Py_ssize_t)
2590findstring(const char *target, Py_ssize_t target_len,
2591 const char *pattern, Py_ssize_t pattern_len,
2592 Py_ssize_t start,
2593 Py_ssize_t end,
2594 int direction)
2595{
2596 if (start < 0) {
2597 start += target_len;
2598 if (start < 0)
2599 start = 0;
2600 }
2601 if (end > target_len) {
2602 end = target_len;
2603 } else if (end < 0) {
2604 end += target_len;
2605 if (end < 0)
2606 end = 0;
2607 }
2608
2609 /* zero-length substrings always match at the first attempt */
2610 if (pattern_len == 0)
2611 return (direction > 0) ? start : end;
2612
2613 end -= pattern_len;
2614
2615 if (direction < 0) {
2616 for (; end >= start; end--)
2617 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2618 return end;
2619 } else {
2620 for (; start <= end; start++)
2621 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2622 return start;
2623 }
2624 return -1;
2625}
2626
2627Py_LOCAL_INLINE(Py_ssize_t)
2628countstring(const char *target, Py_ssize_t target_len,
2629 const char *pattern, Py_ssize_t pattern_len,
2630 Py_ssize_t start,
2631 Py_ssize_t end,
2632 int direction, Py_ssize_t maxcount)
2633{
2634 Py_ssize_t count=0;
2635
2636 if (start < 0) {
2637 start += target_len;
2638 if (start < 0)
2639 start = 0;
2640 }
2641 if (end > target_len) {
2642 end = target_len;
2643 } else if (end < 0) {
2644 end += target_len;
2645 if (end < 0)
2646 end = 0;
2647 }
2648
2649 /* zero-length substrings match everywhere */
2650 if (pattern_len == 0 || maxcount == 0) {
2651 if (target_len+1 < maxcount)
2652 return target_len+1;
2653 return maxcount;
2654 }
2655
2656 end -= pattern_len;
2657 if (direction < 0) {
2658 for (; (end >= start); end--)
2659 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2660 count++;
2661 if (--maxcount <= 0) break;
2662 end -= pattern_len-1;
2663 }
2664 } else {
2665 for (; (start <= end); start++)
2666 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2667 count++;
2668 if (--maxcount <= 0)
2669 break;
2670 start += pattern_len-1;
2671 }
2672 }
2673 return count;
2674}
2675
2676
2677/* Algorithms for different cases of string replacement */
2678
2679/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002680Py_LOCAL(PyStringObject *)
2681replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002682 const char *to_s, Py_ssize_t to_len,
2683 Py_ssize_t maxcount)
2684{
2685 char *self_s, *result_s;
2686 Py_ssize_t self_len, result_len;
2687 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002688 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002689
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002690 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002691
2692 /* 1 at the end plus 1 after every character */
2693 count = self_len+1;
2694 if (maxcount < count)
2695 count = maxcount;
2696
2697 /* Check for overflow */
2698 /* result_len = count * to_len + self_len; */
2699 product = count * to_len;
2700 if (product / to_len != count) {
2701 PyErr_SetString(PyExc_OverflowError,
2702 "replace string is too long");
2703 return NULL;
2704 }
2705 result_len = product + self_len;
2706 if (result_len < 0) {
2707 PyErr_SetString(PyExc_OverflowError,
2708 "replace string is too long");
2709 return NULL;
2710 }
2711
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002712 if (! (result = (PyStringObject *)
2713 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002714 return NULL;
2715
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002716 self_s = PyString_AS_STRING(self);
2717 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002718
2719 /* TODO: special case single character, which doesn't need memcpy */
2720
2721 /* Lay the first one down (guaranteed this will occur) */
2722 Py_MEMCPY(result_s, to_s, to_len);
2723 result_s += to_len;
2724 count -= 1;
2725
2726 for (i=0; i<count; i++) {
2727 *result_s++ = *self_s++;
2728 Py_MEMCPY(result_s, to_s, to_len);
2729 result_s += to_len;
2730 }
2731
2732 /* Copy the rest of the original string */
2733 Py_MEMCPY(result_s, self_s, self_len-i);
2734
2735 return result;
2736}
2737
2738/* Special case for deleting a single character */
2739/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002740Py_LOCAL(PyStringObject *)
2741replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002742 char from_c, Py_ssize_t maxcount)
2743{
2744 char *self_s, *result_s;
2745 char *start, *next, *end;
2746 Py_ssize_t self_len, result_len;
2747 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002748 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002749
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002750 self_len = PyString_GET_SIZE(self);
2751 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002752
2753 count = countchar(self_s, self_len, from_c, maxcount);
2754 if (count == 0) {
2755 return return_self(self);
2756 }
2757
2758 result_len = self_len - count; /* from_len == 1 */
2759 assert(result_len>=0);
2760
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002761 if ( (result = (PyStringObject *)
2762 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002763 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002764 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002765
2766 start = self_s;
2767 end = self_s + self_len;
2768 while (count-- > 0) {
2769 next = findchar(start, end-start, from_c);
2770 if (next == NULL)
2771 break;
2772 Py_MEMCPY(result_s, start, next-start);
2773 result_s += (next-start);
2774 start = next+1;
2775 }
2776 Py_MEMCPY(result_s, start, end-start);
2777
2778 return result;
2779}
2780
2781/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2782
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002783Py_LOCAL(PyStringObject *)
2784replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002785 const char *from_s, Py_ssize_t from_len,
2786 Py_ssize_t maxcount) {
2787 char *self_s, *result_s;
2788 char *start, *next, *end;
2789 Py_ssize_t self_len, result_len;
2790 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002791 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002792
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002793 self_len = PyString_GET_SIZE(self);
2794 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002795
2796 count = countstring(self_s, self_len,
2797 from_s, from_len,
2798 0, self_len, 1,
2799 maxcount);
2800
2801 if (count == 0) {
2802 /* no matches */
2803 return return_self(self);
2804 }
2805
2806 result_len = self_len - (count * from_len);
2807 assert (result_len>=0);
2808
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002809 if ( (result = (PyStringObject *)
2810 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002811 return NULL;
2812
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002813 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002814
2815 start = self_s;
2816 end = self_s + self_len;
2817 while (count-- > 0) {
2818 offset = findstring(start, end-start,
2819 from_s, from_len,
2820 0, end-start, FORWARD);
2821 if (offset == -1)
2822 break;
2823 next = start + offset;
2824
2825 Py_MEMCPY(result_s, start, next-start);
2826
2827 result_s += (next-start);
2828 start = next+from_len;
2829 }
2830 Py_MEMCPY(result_s, start, end-start);
2831 return result;
2832}
2833
2834/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002835Py_LOCAL(PyStringObject *)
2836replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002837 char from_c, char to_c,
2838 Py_ssize_t maxcount)
2839{
2840 char *self_s, *result_s, *start, *end, *next;
2841 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002842 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002843
2844 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002845 self_s = PyString_AS_STRING(self);
2846 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002847
2848 next = findchar(self_s, self_len, from_c);
2849
2850 if (next == NULL) {
2851 /* No matches; return the original string */
2852 return return_self(self);
2853 }
2854
2855 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002856 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002857 if (result == NULL)
2858 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002859 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002860 Py_MEMCPY(result_s, self_s, self_len);
2861
2862 /* change everything in-place, starting with this one */
2863 start = result_s + (next-self_s);
2864 *start = to_c;
2865 start++;
2866 end = result_s + self_len;
2867
2868 while (--maxcount > 0) {
2869 next = findchar(start, end-start, from_c);
2870 if (next == NULL)
2871 break;
2872 *next = to_c;
2873 start = next+1;
2874 }
2875
2876 return result;
2877}
2878
2879/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002880Py_LOCAL(PyStringObject *)
2881replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002882 const char *from_s, Py_ssize_t from_len,
2883 const char *to_s, Py_ssize_t to_len,
2884 Py_ssize_t maxcount)
2885{
2886 char *result_s, *start, *end;
2887 char *self_s;
2888 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002889 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002890
2891 /* The result string will be the same size */
2892
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002893 self_s = PyString_AS_STRING(self);
2894 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002895
2896 offset = findstring(self_s, self_len,
2897 from_s, from_len,
2898 0, self_len, FORWARD);
2899 if (offset == -1) {
2900 /* No matches; return the original string */
2901 return return_self(self);
2902 }
2903
2904 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002905 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002906 if (result == NULL)
2907 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002908 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002909 Py_MEMCPY(result_s, self_s, self_len);
2910
2911 /* change everything in-place, starting with this one */
2912 start = result_s + offset;
2913 Py_MEMCPY(start, to_s, from_len);
2914 start += from_len;
2915 end = result_s + self_len;
2916
2917 while ( --maxcount > 0) {
2918 offset = findstring(start, end-start,
2919 from_s, from_len,
2920 0, end-start, FORWARD);
2921 if (offset==-1)
2922 break;
2923 Py_MEMCPY(start+offset, to_s, from_len);
2924 start += offset+from_len;
2925 }
2926
2927 return result;
2928}
2929
2930/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002931Py_LOCAL(PyStringObject *)
2932replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002933 char from_c,
2934 const char *to_s, Py_ssize_t to_len,
2935 Py_ssize_t maxcount)
2936{
2937 char *self_s, *result_s;
2938 char *start, *next, *end;
2939 Py_ssize_t self_len, result_len;
2940 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002941 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002942
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002943 self_s = PyString_AS_STRING(self);
2944 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002945
2946 count = countchar(self_s, self_len, from_c, maxcount);
2947 if (count == 0) {
2948 /* no matches, return unchanged */
2949 return return_self(self);
2950 }
2951
2952 /* use the difference between current and new, hence the "-1" */
2953 /* result_len = self_len + count * (to_len-1) */
2954 product = count * (to_len-1);
2955 if (product / (to_len-1) != count) {
2956 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2957 return NULL;
2958 }
2959 result_len = self_len + product;
2960 if (result_len < 0) {
2961 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2962 return NULL;
2963 }
2964
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002965 if ( (result = (PyStringObject *)
2966 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002967 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002968 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002969
2970 start = self_s;
2971 end = self_s + self_len;
2972 while (count-- > 0) {
2973 next = findchar(start, end-start, from_c);
2974 if (next == NULL)
2975 break;
2976
2977 if (next == start) {
2978 /* replace with the 'to' */
2979 Py_MEMCPY(result_s, to_s, to_len);
2980 result_s += to_len;
2981 start += 1;
2982 } else {
2983 /* copy the unchanged old then the 'to' */
2984 Py_MEMCPY(result_s, start, next-start);
2985 result_s += (next-start);
2986 Py_MEMCPY(result_s, to_s, to_len);
2987 result_s += to_len;
2988 start = next+1;
2989 }
2990 }
2991 /* Copy the remainder of the remaining string */
2992 Py_MEMCPY(result_s, start, end-start);
2993
2994 return result;
2995}
2996
2997/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002998Py_LOCAL(PyStringObject *)
2999replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003000 const char *from_s, Py_ssize_t from_len,
3001 const char *to_s, Py_ssize_t to_len,
3002 Py_ssize_t maxcount) {
3003 char *self_s, *result_s;
3004 char *start, *next, *end;
3005 Py_ssize_t self_len, result_len;
3006 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003007 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003008
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009 self_s = PyString_AS_STRING(self);
3010 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003011
3012 count = countstring(self_s, self_len,
3013 from_s, from_len,
3014 0, self_len, FORWARD, maxcount);
3015 if (count == 0) {
3016 /* no matches, return unchanged */
3017 return return_self(self);
3018 }
3019
3020 /* Check for overflow */
3021 /* result_len = self_len + count * (to_len-from_len) */
3022 product = count * (to_len-from_len);
3023 if (product / (to_len-from_len) != count) {
3024 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3025 return NULL;
3026 }
3027 result_len = self_len + product;
3028 if (result_len < 0) {
3029 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3030 return NULL;
3031 }
3032
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003033 if ( (result = (PyStringObject *)
3034 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003035 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003036 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003037
3038 start = self_s;
3039 end = self_s + self_len;
3040 while (count-- > 0) {
3041 offset = findstring(start, end-start,
3042 from_s, from_len,
3043 0, end-start, FORWARD);
3044 if (offset == -1)
3045 break;
3046 next = start+offset;
3047 if (next == start) {
3048 /* replace with the 'to' */
3049 Py_MEMCPY(result_s, to_s, to_len);
3050 result_s += to_len;
3051 start += from_len;
3052 } else {
3053 /* copy the unchanged old then the 'to' */
3054 Py_MEMCPY(result_s, start, next-start);
3055 result_s += (next-start);
3056 Py_MEMCPY(result_s, to_s, to_len);
3057 result_s += to_len;
3058 start = next+from_len;
3059 }
3060 }
3061 /* Copy the remainder of the remaining string */
3062 Py_MEMCPY(result_s, start, end-start);
3063
3064 return result;
3065}
3066
3067
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003068Py_LOCAL(PyStringObject *)
3069replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003070 const char *from_s, Py_ssize_t from_len,
3071 const char *to_s, Py_ssize_t to_len,
3072 Py_ssize_t maxcount)
3073{
3074 if (maxcount < 0) {
3075 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003076 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003077 /* nothing to do; return the original string */
3078 return return_self(self);
3079 }
3080
3081 if (maxcount == 0 ||
3082 (from_len == 0 && to_len == 0)) {
3083 /* nothing to do; return the original string */
3084 return return_self(self);
3085 }
3086
3087 /* Handle zero-length special cases */
3088
3089 if (from_len == 0) {
3090 /* insert the 'to' string everywhere. */
3091 /* >>> "Python".replace("", ".") */
3092 /* '.P.y.t.h.o.n.' */
3093 return replace_interleave(self, to_s, to_len, maxcount);
3094 }
3095
3096 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3097 /* point for an empty self string to generate a non-empty string */
3098 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003099 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003100 return return_self(self);
3101 }
3102
3103 if (to_len == 0) {
3104 /* delete all occurances of 'from' string */
3105 if (from_len == 1) {
3106 return replace_delete_single_character(
3107 self, from_s[0], maxcount);
3108 } else {
3109 return replace_delete_substring(self, from_s, from_len, maxcount);
3110 }
3111 }
3112
3113 /* Handle special case where both strings have the same length */
3114
3115 if (from_len == to_len) {
3116 if (from_len == 1) {
3117 return replace_single_character_in_place(
3118 self,
3119 from_s[0],
3120 to_s[0],
3121 maxcount);
3122 } else {
3123 return replace_substring_in_place(
3124 self, from_s, from_len, to_s, to_len, maxcount);
3125 }
3126 }
3127
3128 /* Otherwise use the more generic algorithms */
3129 if (from_len == 1) {
3130 return replace_single_character(self, from_s[0],
3131 to_s, to_len, maxcount);
3132 } else {
3133 /* len('from')>=2, len('to')>=1 */
3134 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3135 }
3136}
3137
3138PyDoc_STRVAR(replace__doc__,
3139"S.replace (old, new[, count]) -> string\n\
3140\n\
3141Return a copy of string S with all occurrences of substring\n\
3142old replaced by new. If the optional argument count is\n\
3143given, only the first count occurrences are replaced.");
3144
3145static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003146string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003147{
3148 Py_ssize_t count = -1;
3149 PyObject *from, *to;
3150 const char *from_s, *to_s;
3151 Py_ssize_t from_len, to_len;
3152
3153 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3154 return NULL;
3155
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003156 if (PyString_Check(from)) {
3157 from_s = PyString_AS_STRING(from);
3158 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003159 }
3160#ifdef Py_USING_UNICODE
3161 if (PyUnicode_Check(from))
3162 return PyUnicode_Replace((PyObject *)self,
3163 from, to, count);
3164#endif
3165 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3166 return NULL;
3167
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003168 if (PyString_Check(to)) {
3169 to_s = PyString_AS_STRING(to);
3170 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003171 }
3172#ifdef Py_USING_UNICODE
3173 else if (PyUnicode_Check(to))
3174 return PyUnicode_Replace((PyObject *)self,
3175 from, to, count);
3176#endif
3177 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3178 return NULL;
3179
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003180 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003181 from_s, from_len,
3182 to_s, to_len, count);
3183}
3184
3185/** End DALKE **/
3186
3187/* Matches the end (direction >= 0) or start (direction < 0) of self
3188 * against substr, using the start and end arguments. Returns
3189 * -1 on error, 0 if not found and 1 if found.
3190 */
3191Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003192_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003193 Py_ssize_t end, int direction)
3194{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003195 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003196 Py_ssize_t slen;
3197 const char* sub;
3198 const char* str;
3199
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003200 if (PyString_Check(substr)) {
3201 sub = PyString_AS_STRING(substr);
3202 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003203 }
3204#ifdef Py_USING_UNICODE
3205 else if (PyUnicode_Check(substr))
3206 return PyUnicode_Tailmatch((PyObject *)self,
3207 substr, start, end, direction);
3208#endif
3209 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3210 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003211 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003212
3213 string_adjust_indices(&start, &end, len);
3214
3215 if (direction < 0) {
3216 /* startswith */
3217 if (start+slen > len)
3218 return 0;
3219 } else {
3220 /* endswith */
3221 if (end-start < slen || start > len)
3222 return 0;
3223
3224 if (end-slen > start)
3225 start = end - slen;
3226 }
3227 if (end-start >= slen)
3228 return ! memcmp(str+start, sub, slen);
3229 return 0;
3230}
3231
3232
3233PyDoc_STRVAR(startswith__doc__,
3234"S.startswith(prefix[, start[, end]]) -> bool\n\
3235\n\
3236Return True if S starts with the specified prefix, False otherwise.\n\
3237With optional start, test S beginning at that position.\n\
3238With optional end, stop comparing S at that position.\n\
3239prefix can also be a tuple of strings to try.");
3240
3241static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003242string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003243{
3244 Py_ssize_t start = 0;
3245 Py_ssize_t end = PY_SSIZE_T_MAX;
3246 PyObject *subobj;
3247 int result;
3248
3249 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3250 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3251 return NULL;
3252 if (PyTuple_Check(subobj)) {
3253 Py_ssize_t i;
3254 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3255 result = _string_tailmatch(self,
3256 PyTuple_GET_ITEM(subobj, i),
3257 start, end, -1);
3258 if (result == -1)
3259 return NULL;
3260 else if (result) {
3261 Py_RETURN_TRUE;
3262 }
3263 }
3264 Py_RETURN_FALSE;
3265 }
3266 result = _string_tailmatch(self, subobj, start, end, -1);
3267 if (result == -1)
3268 return NULL;
3269 else
3270 return PyBool_FromLong(result);
3271}
3272
3273
3274PyDoc_STRVAR(endswith__doc__,
3275"S.endswith(suffix[, start[, end]]) -> bool\n\
3276\n\
3277Return True if S ends with the specified suffix, False otherwise.\n\
3278With optional start, test S beginning at that position.\n\
3279With optional end, stop comparing S at that position.\n\
3280suffix can also be a tuple of strings to try.");
3281
3282static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003283string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003284{
3285 Py_ssize_t start = 0;
3286 Py_ssize_t end = PY_SSIZE_T_MAX;
3287 PyObject *subobj;
3288 int result;
3289
3290 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3291 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3292 return NULL;
3293 if (PyTuple_Check(subobj)) {
3294 Py_ssize_t i;
3295 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3296 result = _string_tailmatch(self,
3297 PyTuple_GET_ITEM(subobj, i),
3298 start, end, +1);
3299 if (result == -1)
3300 return NULL;
3301 else if (result) {
3302 Py_RETURN_TRUE;
3303 }
3304 }
3305 Py_RETURN_FALSE;
3306 }
3307 result = _string_tailmatch(self, subobj, start, end, +1);
3308 if (result == -1)
3309 return NULL;
3310 else
3311 return PyBool_FromLong(result);
3312}
3313
3314
3315PyDoc_STRVAR(encode__doc__,
3316"S.encode([encoding[,errors]]) -> object\n\
3317\n\
3318Encodes S using the codec registered for encoding. encoding defaults\n\
3319to the default encoding. errors may be given to set a different error\n\
3320handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3321a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3322'xmlcharrefreplace' as well as any other name registered with\n\
3323codecs.register_error that is able to handle UnicodeEncodeErrors.");
3324
3325static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003326string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003327{
3328 char *encoding = NULL;
3329 char *errors = NULL;
3330 PyObject *v;
3331
3332 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3333 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003334 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003335 if (v == NULL)
3336 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003337 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003338 PyErr_Format(PyExc_TypeError,
3339 "encoder did not return a string/unicode object "
3340 "(type=%.400s)",
3341 Py_TYPE(v)->tp_name);
3342 Py_DECREF(v);
3343 return NULL;
3344 }
3345 return v;
3346
3347 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003348 return NULL;
3349}
3350
Christian Heimes44720832008-05-26 13:01:01 +00003351
3352PyDoc_STRVAR(decode__doc__,
3353"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003354\n\
Christian Heimes44720832008-05-26 13:01:01 +00003355Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003356to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003357handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3358a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3359as well as any other name registerd with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360able to handle UnicodeDecodeErrors.");
3361
3362static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003363string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003364{
Christian Heimes44720832008-05-26 13:01:01 +00003365 char *encoding = NULL;
3366 char *errors = NULL;
3367 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368
3369 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3370 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003371 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003372 if (v == NULL)
3373 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003374 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003375 PyErr_Format(PyExc_TypeError,
3376 "decoder did not return a string/unicode object "
3377 "(type=%.400s)",
3378 Py_TYPE(v)->tp_name);
3379 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003380 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003381 }
3382 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003383
Christian Heimes44720832008-05-26 13:01:01 +00003384 onError:
3385 return NULL;
3386}
3387
3388
3389PyDoc_STRVAR(expandtabs__doc__,
3390"S.expandtabs([tabsize]) -> string\n\
3391\n\
3392Return a copy of S where all tab characters are expanded using spaces.\n\
3393If tabsize is not given, a tab size of 8 characters is assumed.");
3394
3395static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003396string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003397{
3398 const char *e, *p, *qe;
3399 char *q;
3400 Py_ssize_t i, j, incr;
3401 PyObject *u;
3402 int tabsize = 8;
3403
3404 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3405 return NULL;
3406
3407 /* First pass: determine size of output string */
3408 i = 0; /* chars up to and including most recent \n or \r */
3409 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003410 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3411 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003412 if (*p == '\t') {
3413 if (tabsize > 0) {
3414 incr = tabsize - (j % tabsize);
3415 if (j > PY_SSIZE_T_MAX - incr)
3416 goto overflow1;
3417 j += incr;
3418 }
3419 }
3420 else {
3421 if (j > PY_SSIZE_T_MAX - 1)
3422 goto overflow1;
3423 j++;
3424 if (*p == '\n' || *p == '\r') {
3425 if (i > PY_SSIZE_T_MAX - j)
3426 goto overflow1;
3427 i += j;
3428 j = 0;
3429 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003430 }
Christian Heimes44720832008-05-26 13:01:01 +00003431
3432 if (i > PY_SSIZE_T_MAX - j)
3433 goto overflow1;
3434
3435 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003436 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003437 if (!u)
3438 return NULL;
3439
3440 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003441 q = PyString_AS_STRING(u); /* next output char */
3442 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003443
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003444 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003445 if (*p == '\t') {
3446 if (tabsize > 0) {
3447 i = tabsize - (j % tabsize);
3448 j += i;
3449 while (i--) {
3450 if (q >= qe)
3451 goto overflow2;
3452 *q++ = ' ';
3453 }
3454 }
3455 }
3456 else {
3457 if (q >= qe)
3458 goto overflow2;
3459 *q++ = *p;
3460 j++;
3461 if (*p == '\n' || *p == '\r')
3462 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003463 }
Christian Heimes44720832008-05-26 13:01:01 +00003464
3465 return u;
3466
3467 overflow2:
3468 Py_DECREF(u);
3469 overflow1:
3470 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3471 return NULL;
3472}
3473
3474Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003475pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003476{
3477 PyObject *u;
3478
3479 if (left < 0)
3480 left = 0;
3481 if (right < 0)
3482 right = 0;
3483
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003484 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003485 Py_INCREF(self);
3486 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003487 }
3488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003489 u = PyString_FromStringAndSize(NULL,
3490 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003491 if (u) {
3492 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003493 memset(PyString_AS_STRING(u), fill, left);
3494 Py_MEMCPY(PyString_AS_STRING(u) + left,
3495 PyString_AS_STRING(self),
3496 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003497 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003498 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003499 fill, right);
3500 }
3501
3502 return u;
3503}
3504
3505PyDoc_STRVAR(ljust__doc__,
3506"S.ljust(width[, fillchar]) -> string\n"
3507"\n"
3508"Return S left justified in a string of length width. Padding is\n"
3509"done using the specified fill character (default is a space).");
3510
3511static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003512string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003513{
3514 Py_ssize_t width;
3515 char fillchar = ' ';
3516
3517 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3518 return NULL;
3519
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003520 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003521 Py_INCREF(self);
3522 return (PyObject*) self;
3523 }
3524
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003525 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003526}
3527
3528
3529PyDoc_STRVAR(rjust__doc__,
3530"S.rjust(width[, fillchar]) -> string\n"
3531"\n"
3532"Return S right justified in a string of length width. Padding is\n"
3533"done using the specified fill character (default is a space)");
3534
3535static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003536string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003537{
3538 Py_ssize_t width;
3539 char fillchar = ' ';
3540
3541 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3542 return NULL;
3543
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003544 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003545 Py_INCREF(self);
3546 return (PyObject*) self;
3547 }
3548
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003550}
3551
3552
3553PyDoc_STRVAR(center__doc__,
3554"S.center(width[, fillchar]) -> string\n"
3555"\n"
3556"Return S centered in a string of length width. Padding is\n"
3557"done using the specified fill character (default is a space)");
3558
3559static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003560string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003561{
3562 Py_ssize_t marg, left;
3563 Py_ssize_t width;
3564 char fillchar = ' ';
3565
3566 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3567 return NULL;
3568
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003569 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003570 Py_INCREF(self);
3571 return (PyObject*) self;
3572 }
3573
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003574 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003575 left = marg / 2 + (marg & width & 1);
3576
3577 return pad(self, left, marg - left, fillchar);
3578}
3579
3580PyDoc_STRVAR(zfill__doc__,
3581"S.zfill(width) -> string\n"
3582"\n"
3583"Pad a numeric string S with zeros on the left, to fill a field\n"
3584"of the specified width. The string S is never truncated.");
3585
3586static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003587string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003588{
3589 Py_ssize_t fill;
3590 PyObject *s;
3591 char *p;
3592 Py_ssize_t width;
3593
3594 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3595 return NULL;
3596
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003597 if (PyString_GET_SIZE(self) >= width) {
3598 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003599 Py_INCREF(self);
3600 return (PyObject*) self;
3601 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003602 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003603 return PyString_FromStringAndSize(
3604 PyString_AS_STRING(self),
3605 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003606 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003607 }
3608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003609 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003610
Christian Heimes44720832008-05-26 13:01:01 +00003611 s = pad(self, fill, 0, '0');
3612
3613 if (s == NULL)
3614 return NULL;
3615
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003616 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003617 if (p[fill] == '+' || p[fill] == '-') {
3618 /* move sign to beginning of string */
3619 p[0] = p[fill];
3620 p[fill] = '0';
3621 }
3622
3623 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003624}
3625
Christian Heimes44720832008-05-26 13:01:01 +00003626PyDoc_STRVAR(isspace__doc__,
3627"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003628\n\
Christian Heimes44720832008-05-26 13:01:01 +00003629Return True if all characters in S are whitespace\n\
3630and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003631
Christian Heimes44720832008-05-26 13:01:01 +00003632static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003633string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003634{
Christian Heimes44720832008-05-26 13:01:01 +00003635 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003636 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003637 register const unsigned char *e;
3638
3639 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003640 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003641 isspace(*p))
3642 return PyBool_FromLong(1);
3643
3644 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003645 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003646 return PyBool_FromLong(0);
3647
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003648 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003649 for (; p < e; p++) {
3650 if (!isspace(*p))
3651 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003652 }
Christian Heimes44720832008-05-26 13:01:01 +00003653 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003654}
3655
Christian Heimes44720832008-05-26 13:01:01 +00003656
3657PyDoc_STRVAR(isalpha__doc__,
3658"S.isalpha() -> bool\n\
3659\n\
3660Return True if all characters in S are alphabetic\n\
3661and there is at least one character in S, False otherwise.");
3662
3663static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003664string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003665{
Christian Heimes44720832008-05-26 13:01:01 +00003666 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003667 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003668 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003669
Christian Heimes44720832008-05-26 13:01:01 +00003670 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003671 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003672 isalpha(*p))
3673 return PyBool_FromLong(1);
3674
3675 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003676 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003677 return PyBool_FromLong(0);
3678
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003679 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003680 for (; p < e; p++) {
3681 if (!isalpha(*p))
3682 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003683 }
Christian Heimes44720832008-05-26 13:01:01 +00003684 return PyBool_FromLong(1);
3685}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003686
Christian Heimes44720832008-05-26 13:01:01 +00003687
3688PyDoc_STRVAR(isalnum__doc__,
3689"S.isalnum() -> bool\n\
3690\n\
3691Return True if all characters in S are alphanumeric\n\
3692and there is at least one character in S, False otherwise.");
3693
3694static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003695string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003696{
3697 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003698 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003699 register const unsigned char *e;
3700
3701 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003702 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003703 isalnum(*p))
3704 return PyBool_FromLong(1);
3705
3706 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003707 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003708 return PyBool_FromLong(0);
3709
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003710 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003711 for (; p < e; p++) {
3712 if (!isalnum(*p))
3713 return PyBool_FromLong(0);
3714 }
3715 return PyBool_FromLong(1);
3716}
3717
3718
3719PyDoc_STRVAR(isdigit__doc__,
3720"S.isdigit() -> bool\n\
3721\n\
3722Return True if all characters in S are digits\n\
3723and there is at least one character in S, False otherwise.");
3724
3725static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003726string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003727{
3728 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003729 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003730 register const unsigned char *e;
3731
3732 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003733 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003734 isdigit(*p))
3735 return PyBool_FromLong(1);
3736
3737 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003738 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003739 return PyBool_FromLong(0);
3740
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003741 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003742 for (; p < e; p++) {
3743 if (!isdigit(*p))
3744 return PyBool_FromLong(0);
3745 }
3746 return PyBool_FromLong(1);
3747}
3748
3749
3750PyDoc_STRVAR(islower__doc__,
3751"S.islower() -> bool\n\
3752\n\
3753Return True if all cased characters in S are lowercase and there is\n\
3754at least one cased character in S, False otherwise.");
3755
3756static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003757string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003758{
3759 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003760 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003761 register const unsigned char *e;
3762 int cased;
3763
3764 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003765 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003766 return PyBool_FromLong(islower(*p) != 0);
3767
3768 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003769 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003770 return PyBool_FromLong(0);
3771
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003772 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003773 cased = 0;
3774 for (; p < e; p++) {
3775 if (isupper(*p))
3776 return PyBool_FromLong(0);
3777 else if (!cased && islower(*p))
3778 cased = 1;
3779 }
3780 return PyBool_FromLong(cased);
3781}
3782
3783
3784PyDoc_STRVAR(isupper__doc__,
3785"S.isupper() -> bool\n\
3786\n\
3787Return True if all cased characters in S are uppercase and there is\n\
3788at least one cased character in S, False otherwise.");
3789
3790static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003791string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003792{
3793 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003794 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003795 register const unsigned char *e;
3796 int cased;
3797
3798 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003799 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003800 return PyBool_FromLong(isupper(*p) != 0);
3801
3802 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003803 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003804 return PyBool_FromLong(0);
3805
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003806 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003807 cased = 0;
3808 for (; p < e; p++) {
3809 if (islower(*p))
3810 return PyBool_FromLong(0);
3811 else if (!cased && isupper(*p))
3812 cased = 1;
3813 }
3814 return PyBool_FromLong(cased);
3815}
3816
3817
3818PyDoc_STRVAR(istitle__doc__,
3819"S.istitle() -> bool\n\
3820\n\
3821Return True if S is a titlecased string and there is at least one\n\
3822character in S, i.e. uppercase characters may only follow uncased\n\
3823characters and lowercase characters only cased ones. Return False\n\
3824otherwise.");
3825
3826static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003827string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003828{
3829 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003830 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003831 register const unsigned char *e;
3832 int cased, previous_is_cased;
3833
3834 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003835 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003836 return PyBool_FromLong(isupper(*p) != 0);
3837
3838 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003839 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003840 return PyBool_FromLong(0);
3841
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003842 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003843 cased = 0;
3844 previous_is_cased = 0;
3845 for (; p < e; p++) {
3846 register const unsigned char ch = *p;
3847
3848 if (isupper(ch)) {
3849 if (previous_is_cased)
3850 return PyBool_FromLong(0);
3851 previous_is_cased = 1;
3852 cased = 1;
3853 }
3854 else if (islower(ch)) {
3855 if (!previous_is_cased)
3856 return PyBool_FromLong(0);
3857 previous_is_cased = 1;
3858 cased = 1;
3859 }
3860 else
3861 previous_is_cased = 0;
3862 }
3863 return PyBool_FromLong(cased);
3864}
3865
3866
3867PyDoc_STRVAR(splitlines__doc__,
3868"S.splitlines([keepends]) -> list of strings\n\
3869\n\
3870Return a list of the lines in S, breaking at line boundaries.\n\
3871Line breaks are not included in the resulting list unless keepends\n\
3872is given and true.");
3873
3874static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003875string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003876{
3877 register Py_ssize_t i;
3878 register Py_ssize_t j;
3879 Py_ssize_t len;
3880 int keepends = 0;
3881 PyObject *list;
3882 PyObject *str;
3883 char *data;
3884
3885 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3886 return NULL;
3887
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003888 data = PyString_AS_STRING(self);
3889 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003890
3891 /* This does not use the preallocated list because splitlines is
3892 usually run with hundreds of newlines. The overhead of
3893 switching between PyList_SET_ITEM and append causes about a
3894 2-3% slowdown for that common case. A smarter implementation
3895 could move the if check out, so the SET_ITEMs are done first
3896 and the appends only done when the prealloc buffer is full.
3897 That's too much work for little gain.*/
3898
3899 list = PyList_New(0);
3900 if (!list)
3901 goto onError;
3902
3903 for (i = j = 0; i < len; ) {
3904 Py_ssize_t eol;
3905
3906 /* Find a line and append it */
3907 while (i < len && data[i] != '\n' && data[i] != '\r')
3908 i++;
3909
3910 /* Skip the line break reading CRLF as one line break */
3911 eol = i;
3912 if (i < len) {
3913 if (data[i] == '\r' && i + 1 < len &&
3914 data[i+1] == '\n')
3915 i += 2;
3916 else
3917 i++;
3918 if (keepends)
3919 eol = i;
3920 }
3921 SPLIT_APPEND(data, j, eol);
3922 j = i;
3923 }
3924 if (j < len) {
3925 SPLIT_APPEND(data, j, len);
3926 }
3927
3928 return list;
3929
3930 onError:
3931 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003932 return NULL;
3933}
3934
Robert Schuppenies51df0642008-06-01 16:16:17 +00003935PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003936"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003937
3938static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003939string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003940{
3941 Py_ssize_t res;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003942 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003943 return PyInt_FromSsize_t(res);
3944}
3945
Christian Heimes44720832008-05-26 13:01:01 +00003946#undef SPLIT_APPEND
3947#undef SPLIT_ADD
3948#undef MAX_PREALLOC
3949#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003950
3951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003952string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003953{
Christian Heimes44720832008-05-26 13:01:01 +00003954 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003955}
3956
Christian Heimes1a6387e2008-03-26 12:49:49 +00003957
Christian Heimes44720832008-05-26 13:01:01 +00003958#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003959
Christian Heimes44720832008-05-26 13:01:01 +00003960PyDoc_STRVAR(format__doc__,
3961"S.format(*args, **kwargs) -> unicode\n\
3962\n\
3963");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003964
Eric Smithdc13b792008-05-30 18:10:04 +00003965static PyObject *
3966string__format__(PyObject* self, PyObject* args)
3967{
3968 PyObject *format_spec;
3969 PyObject *result = NULL;
3970 PyObject *tmp = NULL;
3971
3972 /* If 2.x, convert format_spec to the same type as value */
3973 /* This is to allow things like u''.format('') */
3974 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3975 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003976 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003977 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3978 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3979 goto done;
3980 }
3981 tmp = PyObject_Str(format_spec);
3982 if (tmp == NULL)
3983 goto done;
3984 format_spec = tmp;
3985
3986 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003987 PyString_AS_STRING(format_spec),
3988 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003989done:
3990 Py_XDECREF(tmp);
3991 return result;
3992}
3993
Christian Heimes44720832008-05-26 13:01:01 +00003994PyDoc_STRVAR(p_format__doc__,
3995"S.__format__(format_spec) -> unicode\n\
3996\n\
3997");
3998
3999
Christian Heimes1a6387e2008-03-26 12:49:49 +00004000static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004001string_methods[] = {
4002 /* Counterparts of the obsolete stropmodule functions; except
4003 string.maketrans(). */
4004 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4005 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4006 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4007 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4008 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4009 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4010 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4011 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4012 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4013 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4014 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4015 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4016 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4017 capitalize__doc__},
4018 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4019 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4020 endswith__doc__},
4021 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4022 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4023 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4024 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4025 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4026 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4027 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4028 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4029 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4030 rpartition__doc__},
4031 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4032 startswith__doc__},
4033 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4034 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4035 swapcase__doc__},
4036 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4037 translate__doc__},
4038 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4039 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4040 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4041 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4042 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4043 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4044 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4045 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4046 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4047 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4048 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4049 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4050 expandtabs__doc__},
4051 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4052 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004053 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4054 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004055 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4056 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004057};
4058
4059static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004060str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004061
Christian Heimes44720832008-05-26 13:01:01 +00004062static PyObject *
4063string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4064{
4065 PyObject *x = NULL;
4066 static char *kwlist[] = {"object", 0};
4067
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004068 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004069 return str_subtype_new(type, args, kwds);
4070 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4071 return NULL;
4072 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004073 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004074 return PyObject_Str(x);
4075}
4076
4077static PyObject *
4078str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4079{
4080 PyObject *tmp, *pnew;
4081 Py_ssize_t n;
4082
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004083 assert(PyType_IsSubtype(type, &PyString_Type));
4084 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004085 if (tmp == NULL)
4086 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004087 assert(PyString_CheckExact(tmp));
4088 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004089 pnew = type->tp_alloc(type, n);
4090 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004091 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4092 ((PyStringObject *)pnew)->ob_shash =
4093 ((PyStringObject *)tmp)->ob_shash;
4094 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004095 }
4096 Py_DECREF(tmp);
4097 return pnew;
4098}
4099
4100static PyObject *
4101basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4102{
4103 PyErr_SetString(PyExc_TypeError,
4104 "The basestring type cannot be instantiated");
4105 return NULL;
4106}
4107
4108static PyObject *
4109string_mod(PyObject *v, PyObject *w)
4110{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004111 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004112 Py_INCREF(Py_NotImplemented);
4113 return Py_NotImplemented;
4114 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004115 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004116}
4117
4118PyDoc_STRVAR(basestring_doc,
4119"Type basestring cannot be instantiated; it is the base for str and unicode.");
4120
4121static PyNumberMethods string_as_number = {
4122 0, /*nb_add*/
4123 0, /*nb_subtract*/
4124 0, /*nb_multiply*/
4125 0, /*nb_divide*/
4126 string_mod, /*nb_remainder*/
4127};
4128
4129
4130PyTypeObject PyBaseString_Type = {
4131 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4132 "basestring",
4133 0,
4134 0,
4135 0, /* tp_dealloc */
4136 0, /* tp_print */
4137 0, /* tp_getattr */
4138 0, /* tp_setattr */
4139 0, /* tp_compare */
4140 0, /* tp_repr */
4141 0, /* tp_as_number */
4142 0, /* tp_as_sequence */
4143 0, /* tp_as_mapping */
4144 0, /* tp_hash */
4145 0, /* tp_call */
4146 0, /* tp_str */
4147 0, /* tp_getattro */
4148 0, /* tp_setattro */
4149 0, /* tp_as_buffer */
4150 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4151 basestring_doc, /* tp_doc */
4152 0, /* tp_traverse */
4153 0, /* tp_clear */
4154 0, /* tp_richcompare */
4155 0, /* tp_weaklistoffset */
4156 0, /* tp_iter */
4157 0, /* tp_iternext */
4158 0, /* tp_methods */
4159 0, /* tp_members */
4160 0, /* tp_getset */
4161 &PyBaseObject_Type, /* tp_base */
4162 0, /* tp_dict */
4163 0, /* tp_descr_get */
4164 0, /* tp_descr_set */
4165 0, /* tp_dictoffset */
4166 0, /* tp_init */
4167 0, /* tp_alloc */
4168 basestring_new, /* tp_new */
4169 0, /* tp_free */
4170};
4171
4172PyDoc_STRVAR(string_doc,
4173"str(object) -> string\n\
4174\n\
4175Return a nice string representation of the object.\n\
4176If the argument is a string, the return value is the same object.");
4177
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004178PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004179 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4180 "str",
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004181 sizeof(PyStringObject),
Christian Heimes44720832008-05-26 13:01:01 +00004182 sizeof(char),
4183 string_dealloc, /* tp_dealloc */
4184 (printfunc)string_print, /* tp_print */
4185 0, /* tp_getattr */
4186 0, /* tp_setattr */
4187 0, /* tp_compare */
4188 string_repr, /* tp_repr */
4189 &string_as_number, /* tp_as_number */
4190 &string_as_sequence, /* tp_as_sequence */
4191 &string_as_mapping, /* tp_as_mapping */
4192 (hashfunc)string_hash, /* tp_hash */
4193 0, /* tp_call */
4194 string_str, /* tp_str */
4195 PyObject_GenericGetAttr, /* tp_getattro */
4196 0, /* tp_setattro */
4197 &string_as_buffer, /* tp_as_buffer */
4198 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4199 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4200 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4201 string_doc, /* tp_doc */
4202 0, /* tp_traverse */
4203 0, /* tp_clear */
4204 (richcmpfunc)string_richcompare, /* tp_richcompare */
4205 0, /* tp_weaklistoffset */
4206 0, /* tp_iter */
4207 0, /* tp_iternext */
4208 string_methods, /* tp_methods */
4209 0, /* tp_members */
4210 0, /* tp_getset */
4211 &PyBaseString_Type, /* tp_base */
4212 0, /* tp_dict */
4213 0, /* tp_descr_get */
4214 0, /* tp_descr_set */
4215 0, /* tp_dictoffset */
4216 0, /* tp_init */
4217 0, /* tp_alloc */
4218 string_new, /* tp_new */
4219 PyObject_Del, /* tp_free */
4220};
4221
4222void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004223PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004224{
4225 register PyObject *v;
4226 if (*pv == NULL)
4227 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004228 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004229 Py_DECREF(*pv);
4230 *pv = NULL;
4231 return;
4232 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004233 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004234 Py_DECREF(*pv);
4235 *pv = v;
4236}
4237
4238void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004239PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004240{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004241 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004242 Py_XDECREF(w);
4243}
4244
4245
4246/* The following function breaks the notion that strings are immutable:
4247 it changes the size of a string. We get away with this only if there
4248 is only one module referencing the object. You can also think of it
4249 as creating a new string object and destroying the old one, only
4250 more efficiently. In any case, don't use this if the string may
4251 already be known to some other part of the code...
4252 Note that if there's not enough memory to resize the string, the original
4253 string object at *pv is deallocated, *pv is set to NULL, an "out of
4254 memory" exception is set, and -1 is returned. Else (on success) 0 is
4255 returned, and the value in *pv may or may not be the same as on input.
4256 As always, an extra byte is allocated for a trailing \0 byte (newsize
4257 does *not* include that), and a trailing \0 byte is stored.
4258*/
4259
4260int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004261_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004262{
4263 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004264 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004265 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004266 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4267 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004268 *pv = 0;
4269 Py_DECREF(v);
4270 PyErr_BadInternalCall();
4271 return -1;
4272 }
4273 /* XXX UNREF/NEWREF interface should be more symmetrical */
4274 _Py_DEC_REFTOTAL;
4275 _Py_ForgetReference(v);
4276 *pv = (PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004277 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004278 if (*pv == NULL) {
4279 PyObject_Del(v);
4280 PyErr_NoMemory();
4281 return -1;
4282 }
4283 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004284 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004285 Py_SIZE(sv) = newsize;
4286 sv->ob_sval[newsize] = '\0';
4287 sv->ob_shash = -1; /* invalidate cached hash value */
4288 return 0;
4289}
4290
4291/* Helpers for formatstring */
4292
4293Py_LOCAL_INLINE(PyObject *)
4294getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4295{
4296 Py_ssize_t argidx = *p_argidx;
4297 if (argidx < arglen) {
4298 (*p_argidx)++;
4299 if (arglen < 0)
4300 return args;
4301 else
4302 return PyTuple_GetItem(args, argidx);
4303 }
4304 PyErr_SetString(PyExc_TypeError,
4305 "not enough arguments for format string");
4306 return NULL;
4307}
4308
4309/* Format codes
4310 * F_LJUST '-'
4311 * F_SIGN '+'
4312 * F_BLANK ' '
4313 * F_ALT '#'
4314 * F_ZERO '0'
4315 */
4316#define F_LJUST (1<<0)
4317#define F_SIGN (1<<1)
4318#define F_BLANK (1<<2)
4319#define F_ALT (1<<3)
4320#define F_ZERO (1<<4)
4321
4322Py_LOCAL_INLINE(int)
4323formatfloat(char *buf, size_t buflen, int flags,
4324 int prec, int type, PyObject *v)
4325{
4326 /* fmt = '%#.' + `prec` + `type`
4327 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4328 char fmt[20];
4329 double x;
4330 x = PyFloat_AsDouble(v);
4331 if (x == -1.0 && PyErr_Occurred()) {
4332 PyErr_Format(PyExc_TypeError, "float argument required, "
4333 "not %.200s", Py_TYPE(v)->tp_name);
4334 return -1;
4335 }
4336 if (prec < 0)
4337 prec = 6;
Eric Smithd6c393a2008-07-17 19:49:47 +00004338 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4339 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004340 /* Worst case length calc to ensure no buffer overrun:
4341
4342 'g' formats:
4343 fmt = %#.<prec>g
4344 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4345 for any double rep.)
4346 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4347
4348 'f' formats:
4349 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4350 len = 1 + 50 + 1 + prec = 52 + prec
4351
4352 If prec=0 the effective precision is 1 (the leading digit is
4353 always given), therefore increase the length by one.
4354
4355 */
4356 if (((type == 'g' || type == 'G') &&
4357 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004358 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004359 PyErr_SetString(PyExc_OverflowError,
4360 "formatted float is too long (precision too large?)");
4361 return -1;
4362 }
4363 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4364 (flags&F_ALT) ? "#" : "",
4365 prec, type);
4366 PyOS_ascii_formatd(buf, buflen, fmt, x);
4367 return (int)strlen(buf);
4368}
4369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004370/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004371 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4372 * Python's regular ints.
4373 * Return value: a new PyString*, or NULL if error.
4374 * . *pbuf is set to point into it,
4375 * *plen set to the # of chars following that.
4376 * Caller must decref it when done using pbuf.
4377 * The string starting at *pbuf is of the form
4378 * "-"? ("0x" | "0X")? digit+
4379 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4380 * set in flags. The case of hex digits will be correct,
4381 * There will be at least prec digits, zero-filled on the left if
4382 * necessary to get that many.
4383 * val object to be converted
4384 * flags bitmask of format flags; only F_ALT is looked at
4385 * prec minimum number of digits; 0-fill on left if needed
4386 * type a character in [duoxX]; u acts the same as d
4387 *
4388 * CAUTION: o, x and X conversions on regular ints can never
4389 * produce a '-' sign, but can for Python's unbounded ints.
4390 */
4391PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004392_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004393 char **pbuf, int *plen)
4394{
4395 PyObject *result = NULL;
4396 char *buf;
4397 Py_ssize_t i;
4398 int sign; /* 1 if '-', else 0 */
4399 int len; /* number of characters */
4400 Py_ssize_t llen;
4401 int numdigits; /* len == numnondigits + numdigits */
4402 int numnondigits = 0;
4403
4404 switch (type) {
4405 case 'd':
4406 case 'u':
4407 result = Py_TYPE(val)->tp_str(val);
4408 break;
4409 case 'o':
4410 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4411 break;
4412 case 'x':
4413 case 'X':
4414 numnondigits = 2;
4415 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4416 break;
4417 default:
4418 assert(!"'type' not in [duoxX]");
4419 }
4420 if (!result)
4421 return NULL;
4422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004423 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004424 if (!buf) {
4425 Py_DECREF(result);
4426 return NULL;
4427 }
4428
4429 /* To modify the string in-place, there can only be one reference. */
4430 if (Py_REFCNT(result) != 1) {
4431 PyErr_BadInternalCall();
4432 return NULL;
4433 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004434 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004435 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004436 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004437 return NULL;
4438 }
4439 len = (int)llen;
4440 if (buf[len-1] == 'L') {
4441 --len;
4442 buf[len] = '\0';
4443 }
4444 sign = buf[0] == '-';
4445 numnondigits += sign;
4446 numdigits = len - numnondigits;
4447 assert(numdigits > 0);
4448
4449 /* Get rid of base marker unless F_ALT */
4450 if ((flags & F_ALT) == 0) {
4451 /* Need to skip 0x, 0X or 0. */
4452 int skipped = 0;
4453 switch (type) {
4454 case 'o':
4455 assert(buf[sign] == '0');
4456 /* If 0 is only digit, leave it alone. */
4457 if (numdigits > 1) {
4458 skipped = 1;
4459 --numdigits;
4460 }
4461 break;
4462 case 'x':
4463 case 'X':
4464 assert(buf[sign] == '0');
4465 assert(buf[sign + 1] == 'x');
4466 skipped = 2;
4467 numnondigits -= 2;
4468 break;
4469 }
4470 if (skipped) {
4471 buf += skipped;
4472 len -= skipped;
4473 if (sign)
4474 buf[0] = '-';
4475 }
4476 assert(len == numnondigits + numdigits);
4477 assert(numdigits > 0);
4478 }
4479
4480 /* Fill with leading zeroes to meet minimum width. */
4481 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004482 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004483 numnondigits + prec);
4484 char *b1;
4485 if (!r1) {
4486 Py_DECREF(result);
4487 return NULL;
4488 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004489 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004490 for (i = 0; i < numnondigits; ++i)
4491 *b1++ = *buf++;
4492 for (i = 0; i < prec - numdigits; i++)
4493 *b1++ = '0';
4494 for (i = 0; i < numdigits; i++)
4495 *b1++ = *buf++;
4496 *b1 = '\0';
4497 Py_DECREF(result);
4498 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004499 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004500 len = numnondigits + prec;
4501 }
4502
4503 /* Fix up case for hex conversions. */
4504 if (type == 'X') {
4505 /* Need to convert all lower case letters to upper case.
4506 and need to convert 0x to 0X (and -0x to -0X). */
4507 for (i = 0; i < len; i++)
4508 if (buf[i] >= 'a' && buf[i] <= 'x')
4509 buf[i] -= 'a'-'A';
4510 }
4511 *pbuf = buf;
4512 *plen = len;
4513 return result;
4514}
4515
4516Py_LOCAL_INLINE(int)
4517formatint(char *buf, size_t buflen, int flags,
4518 int prec, int type, PyObject *v)
4519{
4520 /* fmt = '%#.' + `prec` + 'l' + `type`
4521 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4522 + 1 + 1 = 24 */
4523 char fmt[64]; /* plenty big enough! */
4524 char *sign;
4525 long x;
4526
4527 x = PyInt_AsLong(v);
4528 if (x == -1 && PyErr_Occurred()) {
4529 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4530 Py_TYPE(v)->tp_name);
4531 return -1;
4532 }
4533 if (x < 0 && type == 'u') {
4534 type = 'd';
4535 }
4536 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4537 sign = "-";
4538 else
4539 sign = "";
4540 if (prec < 0)
4541 prec = 1;
4542
4543 if ((flags & F_ALT) &&
4544 (type == 'x' || type == 'X')) {
4545 /* When converting under %#x or %#X, there are a number
4546 * of issues that cause pain:
4547 * - when 0 is being converted, the C standard leaves off
4548 * the '0x' or '0X', which is inconsistent with other
4549 * %#x/%#X conversions and inconsistent with Python's
4550 * hex() function
4551 * - there are platforms that violate the standard and
4552 * convert 0 with the '0x' or '0X'
4553 * (Metrowerks, Compaq Tru64)
4554 * - there are platforms that give '0x' when converting
4555 * under %#X, but convert 0 in accordance with the
4556 * standard (OS/2 EMX)
4557 *
4558 * We can achieve the desired consistency by inserting our
4559 * own '0x' or '0X' prefix, and substituting %x/%X in place
4560 * of %#x/%#X.
4561 *
4562 * Note that this is the same approach as used in
4563 * formatint() in unicodeobject.c
4564 */
4565 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4566 sign, type, prec, type);
4567 }
4568 else {
4569 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4570 sign, (flags&F_ALT) ? "#" : "",
4571 prec, type);
4572 }
4573
4574 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4575 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4576 */
4577 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4578 PyErr_SetString(PyExc_OverflowError,
4579 "formatted integer is too long (precision too large?)");
4580 return -1;
4581 }
4582 if (sign[0])
4583 PyOS_snprintf(buf, buflen, fmt, -x);
4584 else
4585 PyOS_snprintf(buf, buflen, fmt, x);
4586 return (int)strlen(buf);
4587}
4588
4589Py_LOCAL_INLINE(int)
4590formatchar(char *buf, size_t buflen, PyObject *v)
4591{
4592 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004593 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004594 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4595 return -1;
4596 }
4597 else {
4598 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4599 return -1;
4600 }
4601 buf[1] = '\0';
4602 return 1;
4603}
4604
4605/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4606
4607 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4608 chars are formatted. XXX This is a magic number. Each formatting
4609 routine does bounds checking to ensure no overflow, but a better
4610 solution may be to malloc a buffer of appropriate size for each
4611 format. For now, the current solution is sufficient.
4612*/
4613#define FORMATBUFLEN (size_t)120
4614
4615PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004616PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004617{
4618 char *fmt, *res;
4619 Py_ssize_t arglen, argidx;
4620 Py_ssize_t reslen, rescnt, fmtcnt;
4621 int args_owned = 0;
4622 PyObject *result, *orig_args;
4623#ifdef Py_USING_UNICODE
4624 PyObject *v, *w;
4625#endif
4626 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004627 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004628 PyErr_BadInternalCall();
4629 return NULL;
4630 }
4631 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004632 fmt = PyString_AS_STRING(format);
4633 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004634 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004635 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004636 if (result == NULL)
4637 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004638 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004639 if (PyTuple_Check(args)) {
4640 arglen = PyTuple_GET_SIZE(args);
4641 argidx = 0;
4642 }
4643 else {
4644 arglen = -1;
4645 argidx = -2;
4646 }
4647 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4648 !PyObject_TypeCheck(args, &PyBaseString_Type))
4649 dict = args;
4650 while (--fmtcnt >= 0) {
4651 if (*fmt != '%') {
4652 if (--rescnt < 0) {
4653 rescnt = fmtcnt + 100;
4654 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004655 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004656 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004657 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004658 + reslen - rescnt;
4659 --rescnt;
4660 }
4661 *res++ = *fmt++;
4662 }
4663 else {
4664 /* Got a format specifier */
4665 int flags = 0;
4666 Py_ssize_t width = -1;
4667 int prec = -1;
4668 int c = '\0';
4669 int fill;
4670 int isnumok;
4671 PyObject *v = NULL;
4672 PyObject *temp = NULL;
4673 char *pbuf;
4674 int sign;
4675 Py_ssize_t len;
4676 char formatbuf[FORMATBUFLEN];
4677 /* For format{float,int,char}() */
4678#ifdef Py_USING_UNICODE
4679 char *fmt_start = fmt;
4680 Py_ssize_t argidx_start = argidx;
4681#endif
4682
4683 fmt++;
4684 if (*fmt == '(') {
4685 char *keystart;
4686 Py_ssize_t keylen;
4687 PyObject *key;
4688 int pcount = 1;
4689
4690 if (dict == NULL) {
4691 PyErr_SetString(PyExc_TypeError,
4692 "format requires a mapping");
4693 goto error;
4694 }
4695 ++fmt;
4696 --fmtcnt;
4697 keystart = fmt;
4698 /* Skip over balanced parentheses */
4699 while (pcount > 0 && --fmtcnt >= 0) {
4700 if (*fmt == ')')
4701 --pcount;
4702 else if (*fmt == '(')
4703 ++pcount;
4704 fmt++;
4705 }
4706 keylen = fmt - keystart - 1;
4707 if (fmtcnt < 0 || pcount > 0) {
4708 PyErr_SetString(PyExc_ValueError,
4709 "incomplete format key");
4710 goto error;
4711 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004712 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004713 keylen);
4714 if (key == NULL)
4715 goto error;
4716 if (args_owned) {
4717 Py_DECREF(args);
4718 args_owned = 0;
4719 }
4720 args = PyObject_GetItem(dict, key);
4721 Py_DECREF(key);
4722 if (args == NULL) {
4723 goto error;
4724 }
4725 args_owned = 1;
4726 arglen = -1;
4727 argidx = -2;
4728 }
4729 while (--fmtcnt >= 0) {
4730 switch (c = *fmt++) {
4731 case '-': flags |= F_LJUST; continue;
4732 case '+': flags |= F_SIGN; continue;
4733 case ' ': flags |= F_BLANK; continue;
4734 case '#': flags |= F_ALT; continue;
4735 case '0': flags |= F_ZERO; continue;
4736 }
4737 break;
4738 }
4739 if (c == '*') {
4740 v = getnextarg(args, arglen, &argidx);
4741 if (v == NULL)
4742 goto error;
4743 if (!PyInt_Check(v)) {
4744 PyErr_SetString(PyExc_TypeError,
4745 "* wants int");
4746 goto error;
4747 }
4748 width = PyInt_AsLong(v);
4749 if (width < 0) {
4750 flags |= F_LJUST;
4751 width = -width;
4752 }
4753 if (--fmtcnt >= 0)
4754 c = *fmt++;
4755 }
4756 else if (c >= 0 && isdigit(c)) {
4757 width = c - '0';
4758 while (--fmtcnt >= 0) {
4759 c = Py_CHARMASK(*fmt++);
4760 if (!isdigit(c))
4761 break;
4762 if ((width*10) / 10 != width) {
4763 PyErr_SetString(
4764 PyExc_ValueError,
4765 "width too big");
4766 goto error;
4767 }
4768 width = width*10 + (c - '0');
4769 }
4770 }
4771 if (c == '.') {
4772 prec = 0;
4773 if (--fmtcnt >= 0)
4774 c = *fmt++;
4775 if (c == '*') {
4776 v = getnextarg(args, arglen, &argidx);
4777 if (v == NULL)
4778 goto error;
4779 if (!PyInt_Check(v)) {
4780 PyErr_SetString(
4781 PyExc_TypeError,
4782 "* wants int");
4783 goto error;
4784 }
4785 prec = PyInt_AsLong(v);
4786 if (prec < 0)
4787 prec = 0;
4788 if (--fmtcnt >= 0)
4789 c = *fmt++;
4790 }
4791 else if (c >= 0 && isdigit(c)) {
4792 prec = c - '0';
4793 while (--fmtcnt >= 0) {
4794 c = Py_CHARMASK(*fmt++);
4795 if (!isdigit(c))
4796 break;
4797 if ((prec*10) / 10 != prec) {
4798 PyErr_SetString(
4799 PyExc_ValueError,
4800 "prec too big");
4801 goto error;
4802 }
4803 prec = prec*10 + (c - '0');
4804 }
4805 }
4806 } /* prec */
4807 if (fmtcnt >= 0) {
4808 if (c == 'h' || c == 'l' || c == 'L') {
4809 if (--fmtcnt >= 0)
4810 c = *fmt++;
4811 }
4812 }
4813 if (fmtcnt < 0) {
4814 PyErr_SetString(PyExc_ValueError,
4815 "incomplete format");
4816 goto error;
4817 }
4818 if (c != '%') {
4819 v = getnextarg(args, arglen, &argidx);
4820 if (v == NULL)
4821 goto error;
4822 }
4823 sign = 0;
4824 fill = ' ';
4825 switch (c) {
4826 case '%':
4827 pbuf = "%";
4828 len = 1;
4829 break;
4830 case 's':
4831#ifdef Py_USING_UNICODE
4832 if (PyUnicode_Check(v)) {
4833 fmt = fmt_start;
4834 argidx = argidx_start;
4835 goto unicode;
4836 }
4837#endif
4838 temp = _PyObject_Str(v);
4839#ifdef Py_USING_UNICODE
4840 if (temp != NULL && PyUnicode_Check(temp)) {
4841 Py_DECREF(temp);
4842 fmt = fmt_start;
4843 argidx = argidx_start;
4844 goto unicode;
4845 }
4846#endif
4847 /* Fall through */
4848 case 'r':
4849 if (c == 'r')
4850 temp = PyObject_Repr(v);
4851 if (temp == NULL)
4852 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004853 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004854 PyErr_SetString(PyExc_TypeError,
4855 "%s argument has non-string str()");
4856 Py_DECREF(temp);
4857 goto error;
4858 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004859 pbuf = PyString_AS_STRING(temp);
4860 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004861 if (prec >= 0 && len > prec)
4862 len = prec;
4863 break;
4864 case 'i':
4865 case 'd':
4866 case 'u':
4867 case 'o':
4868 case 'x':
4869 case 'X':
4870 if (c == 'i')
4871 c = 'd';
4872 isnumok = 0;
4873 if (PyNumber_Check(v)) {
4874 PyObject *iobj=NULL;
4875
4876 if (PyInt_Check(v) || (PyLong_Check(v))) {
4877 iobj = v;
4878 Py_INCREF(iobj);
4879 }
4880 else {
4881 iobj = PyNumber_Int(v);
4882 if (iobj==NULL) iobj = PyNumber_Long(v);
4883 }
4884 if (iobj!=NULL) {
4885 if (PyInt_Check(iobj)) {
4886 isnumok = 1;
4887 pbuf = formatbuf;
4888 len = formatint(pbuf,
4889 sizeof(formatbuf),
4890 flags, prec, c, iobj);
4891 Py_DECREF(iobj);
4892 if (len < 0)
4893 goto error;
4894 sign = 1;
4895 }
4896 else if (PyLong_Check(iobj)) {
4897 int ilen;
4898
4899 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004900 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004901 prec, c, &pbuf, &ilen);
4902 Py_DECREF(iobj);
4903 len = ilen;
4904 if (!temp)
4905 goto error;
4906 sign = 1;
4907 }
4908 else {
4909 Py_DECREF(iobj);
4910 }
4911 }
4912 }
4913 if (!isnumok) {
4914 PyErr_Format(PyExc_TypeError,
4915 "%%%c format: a number is required, "
4916 "not %.200s", c, Py_TYPE(v)->tp_name);
4917 goto error;
4918 }
4919 if (flags & F_ZERO)
4920 fill = '0';
4921 break;
4922 case 'e':
4923 case 'E':
4924 case 'f':
4925 case 'F':
4926 case 'g':
4927 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00004928 if (c == 'F')
4929 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00004930 pbuf = formatbuf;
4931 len = formatfloat(pbuf, sizeof(formatbuf),
4932 flags, prec, c, v);
4933 if (len < 0)
4934 goto error;
4935 sign = 1;
4936 if (flags & F_ZERO)
4937 fill = '0';
4938 break;
4939 case 'c':
4940#ifdef Py_USING_UNICODE
4941 if (PyUnicode_Check(v)) {
4942 fmt = fmt_start;
4943 argidx = argidx_start;
4944 goto unicode;
4945 }
4946#endif
4947 pbuf = formatbuf;
4948 len = formatchar(pbuf, sizeof(formatbuf), v);
4949 if (len < 0)
4950 goto error;
4951 break;
4952 default:
4953 PyErr_Format(PyExc_ValueError,
4954 "unsupported format character '%c' (0x%x) "
4955 "at index %zd",
4956 c, c,
4957 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004958 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004959 goto error;
4960 }
4961 if (sign) {
4962 if (*pbuf == '-' || *pbuf == '+') {
4963 sign = *pbuf++;
4964 len--;
4965 }
4966 else if (flags & F_SIGN)
4967 sign = '+';
4968 else if (flags & F_BLANK)
4969 sign = ' ';
4970 else
4971 sign = 0;
4972 }
4973 if (width < len)
4974 width = len;
4975 if (rescnt - (sign != 0) < width) {
4976 reslen -= rescnt;
4977 rescnt = width + fmtcnt + 100;
4978 reslen += rescnt;
4979 if (reslen < 0) {
4980 Py_DECREF(result);
4981 Py_XDECREF(temp);
4982 return PyErr_NoMemory();
4983 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004984 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00004985 Py_XDECREF(temp);
4986 return NULL;
4987 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004988 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004989 + reslen - rescnt;
4990 }
4991 if (sign) {
4992 if (fill != ' ')
4993 *res++ = sign;
4994 rescnt--;
4995 if (width > len)
4996 width--;
4997 }
4998 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4999 assert(pbuf[0] == '0');
5000 assert(pbuf[1] == c);
5001 if (fill != ' ') {
5002 *res++ = *pbuf++;
5003 *res++ = *pbuf++;
5004 }
5005 rescnt -= 2;
5006 width -= 2;
5007 if (width < 0)
5008 width = 0;
5009 len -= 2;
5010 }
5011 if (width > len && !(flags & F_LJUST)) {
5012 do {
5013 --rescnt;
5014 *res++ = fill;
5015 } while (--width > len);
5016 }
5017 if (fill == ' ') {
5018 if (sign)
5019 *res++ = sign;
5020 if ((flags & F_ALT) &&
5021 (c == 'x' || c == 'X')) {
5022 assert(pbuf[0] == '0');
5023 assert(pbuf[1] == c);
5024 *res++ = *pbuf++;
5025 *res++ = *pbuf++;
5026 }
5027 }
5028 Py_MEMCPY(res, pbuf, len);
5029 res += len;
5030 rescnt -= len;
5031 while (--width >= len) {
5032 --rescnt;
5033 *res++ = ' ';
5034 }
5035 if (dict && (argidx < arglen) && c != '%') {
5036 PyErr_SetString(PyExc_TypeError,
5037 "not all arguments converted during string formatting");
5038 Py_XDECREF(temp);
5039 goto error;
5040 }
5041 Py_XDECREF(temp);
5042 } /* '%' */
5043 } /* until end */
5044 if (argidx < arglen && !dict) {
5045 PyErr_SetString(PyExc_TypeError,
5046 "not all arguments converted during string formatting");
5047 goto error;
5048 }
5049 if (args_owned) {
5050 Py_DECREF(args);
5051 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005052 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005053 return result;
5054
5055#ifdef Py_USING_UNICODE
5056 unicode:
5057 if (args_owned) {
5058 Py_DECREF(args);
5059 args_owned = 0;
5060 }
5061 /* Fiddle args right (remove the first argidx arguments) */
5062 if (PyTuple_Check(orig_args) && argidx > 0) {
5063 PyObject *v;
5064 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5065 v = PyTuple_New(n);
5066 if (v == NULL)
5067 goto error;
5068 while (--n >= 0) {
5069 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5070 Py_INCREF(w);
5071 PyTuple_SET_ITEM(v, n, w);
5072 }
5073 args = v;
5074 } else {
5075 Py_INCREF(orig_args);
5076 args = orig_args;
5077 }
5078 args_owned = 1;
5079 /* Take what we have of the result and let the Unicode formatting
5080 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005081 rescnt = res - PyString_AS_STRING(result);
5082 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005083 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005084 fmtcnt = PyString_GET_SIZE(format) - \
5085 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005086 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5087 if (format == NULL)
5088 goto error;
5089 v = PyUnicode_Format(format, args);
5090 Py_DECREF(format);
5091 if (v == NULL)
5092 goto error;
5093 /* Paste what we have (result) to what the Unicode formatting
5094 function returned (v) and return the result (or error) */
5095 w = PyUnicode_Concat(result, v);
5096 Py_DECREF(result);
5097 Py_DECREF(v);
5098 Py_DECREF(args);
5099 return w;
5100#endif /* Py_USING_UNICODE */
5101
5102 error:
5103 Py_DECREF(result);
5104 if (args_owned) {
5105 Py_DECREF(args);
5106 }
5107 return NULL;
5108}
5109
5110void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005111PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005112{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005113 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005114 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005115 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005116 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005117 /* If it's a string subclass, we don't really know what putting
5118 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005119 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005120 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005121 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005122 return;
5123 if (interned == NULL) {
5124 interned = PyDict_New();
5125 if (interned == NULL) {
5126 PyErr_Clear(); /* Don't leave an exception */
5127 return;
5128 }
5129 }
5130 t = PyDict_GetItem(interned, (PyObject *)s);
5131 if (t) {
5132 Py_INCREF(t);
5133 Py_DECREF(*p);
5134 *p = t;
5135 return;
5136 }
5137
5138 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5139 PyErr_Clear();
5140 return;
5141 }
5142 /* The two references in interned are not counted by refcnt.
5143 The string deallocator will take care of this */
5144 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005145 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005146}
5147
5148void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005149PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005150{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005151 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005152 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5153 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005154 Py_INCREF(*p);
5155 }
5156}
5157
5158
5159PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005160PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005161{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005162 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005163 if (s == NULL)
5164 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005165 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005166 return s;
5167}
5168
5169void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005170PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005171{
5172 int i;
5173 for (i = 0; i < UCHAR_MAX + 1; i++) {
5174 Py_XDECREF(characters[i]);
5175 characters[i] = NULL;
5176 }
5177 Py_XDECREF(nullstring);
5178 nullstring = NULL;
5179}
5180
5181void _Py_ReleaseInternedStrings(void)
5182{
5183 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005184 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005185 Py_ssize_t i, n;
5186 Py_ssize_t immortal_size = 0, mortal_size = 0;
5187
5188 if (interned == NULL || !PyDict_Check(interned))
5189 return;
5190 keys = PyDict_Keys(interned);
5191 if (keys == NULL || !PyList_Check(keys)) {
5192 PyErr_Clear();
5193 return;
5194 }
5195
5196 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5197 detector, interned strings are not forcibly deallocated; rather, we
5198 give them their stolen references back, and then clear and DECREF
5199 the interned dict. */
5200
5201 n = PyList_GET_SIZE(keys);
5202 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5203 n);
5204 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005205 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005206 switch (s->ob_sstate) {
5207 case SSTATE_NOT_INTERNED:
5208 /* XXX Shouldn't happen */
5209 break;
5210 case SSTATE_INTERNED_IMMORTAL:
5211 Py_REFCNT(s) += 1;
5212 immortal_size += Py_SIZE(s);
5213 break;
5214 case SSTATE_INTERNED_MORTAL:
5215 Py_REFCNT(s) += 2;
5216 mortal_size += Py_SIZE(s);
5217 break;
5218 default:
5219 Py_FatalError("Inconsistent interned string state.");
5220 }
5221 s->ob_sstate = SSTATE_NOT_INTERNED;
5222 }
5223 fprintf(stderr, "total size of all interned strings: "
5224 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5225 "mortal/immortal\n", mortal_size, immortal_size);
5226 Py_DECREF(keys);
5227 PyDict_Clear(interned);
5228 Py_DECREF(interned);
5229 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005230}