blob: abb1d3a0ee6017c687460250f9bb62a815a5dc0e [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194#endif
Christian Heimes44720832008-05-26 13:01:01 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
198
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
Mark Dickinson82864d12009-11-15 16:18:58 +0000202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
206#ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
212#endif
213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000215 ++f;
Mark Dickinson82864d12009-11-15 16:18:58 +0000216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235#endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
241
Christian Heimes44720832008-05-26 13:01:01 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000273 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000274 if (!string)
275 return NULL;
276
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286#endif
Christian Heimes44720832008-05-26 13:01:01 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
Mark Dickinson82864d12009-11-15 16:18:58 +0000301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
307#ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
313#endif
Christian Heimes44720832008-05-26 13:01:01 +0000314 }
315 /* handle the size_t flag. */
Mark Dickinson82864d12009-11-15 16:18:58 +0000316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000317 size_tflag = 1;
318 ++f;
319 }
320
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332#endif
Christian Heimes44720832008-05-26 13:01:01 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348#endif
Christian Heimes44720832008-05-26 13:01:01 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
396
397 end:
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +0000398 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
399 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000400 return string;
401}
402
403PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000404PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000405{
406 PyObject* ret;
407 va_list vargs;
408
409#ifdef HAVE_STDARG_PROTOTYPES
410 va_start(vargs, format);
411#else
412 va_start(vargs);
413#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000414 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000415 va_end(vargs);
416 return ret;
417}
418
419
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000420PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000421 Py_ssize_t size,
422 const char *encoding,
423 const char *errors)
424{
425 PyObject *v, *str;
426
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000427 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000428 if (str == NULL)
429 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000430 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000431 Py_DECREF(str);
432 return v;
433}
434
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000435PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000436 const char *encoding,
437 const char *errors)
438{
439 PyObject *v;
440
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000441 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000442 PyErr_BadArgument();
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
445
Christian Heimes44720832008-05-26 13:01:01 +0000446 if (encoding == NULL) {
447#ifdef Py_USING_UNICODE
448 encoding = PyUnicode_GetDefaultEncoding();
449#else
450 PyErr_SetString(PyExc_ValueError, "no encoding specified");
451 goto onError;
452#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000453 }
Christian Heimes44720832008-05-26 13:01:01 +0000454
455 /* Decode via the codec registry */
456 v = PyCodec_Decode(str, encoding, errors);
457 if (v == NULL)
458 goto onError;
459
460 return v;
461
462 onError:
463 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000464}
465
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000466PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000467 const char *encoding,
468 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000469{
Christian Heimes44720832008-05-26 13:01:01 +0000470 PyObject *v;
471
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000472 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000473 if (v == NULL)
474 goto onError;
475
476#ifdef Py_USING_UNICODE
477 /* Convert Unicode to a string using the default encoding */
478 if (PyUnicode_Check(v)) {
479 PyObject *temp = v;
480 v = PyUnicode_AsEncodedString(v, NULL, NULL);
481 Py_DECREF(temp);
482 if (v == NULL)
483 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000484 }
Christian Heimes44720832008-05-26 13:01:01 +0000485#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000486 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000487 PyErr_Format(PyExc_TypeError,
488 "decoder did not return a string object (type=%.400s)",
489 Py_TYPE(v)->tp_name);
490 Py_DECREF(v);
491 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000492 }
Christian Heimes44720832008-05-26 13:01:01 +0000493
494 return v;
495
496 onError:
497 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000498}
499
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000500PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000501 Py_ssize_t size,
502 const char *encoding,
503 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000504{
Christian Heimes44720832008-05-26 13:01:01 +0000505 PyObject *v, *str;
506
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000507 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000508 if (str == NULL)
509 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000510 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000511 Py_DECREF(str);
512 return v;
513}
514
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000515PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000516 const char *encoding,
517 const char *errors)
518{
519 PyObject *v;
520
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000521 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000522 PyErr_BadArgument();
523 goto onError;
524 }
525
526 if (encoding == NULL) {
527#ifdef Py_USING_UNICODE
528 encoding = PyUnicode_GetDefaultEncoding();
529#else
530 PyErr_SetString(PyExc_ValueError, "no encoding specified");
531 goto onError;
532#endif
533 }
534
535 /* Encode via the codec registry */
536 v = PyCodec_Encode(str, encoding, errors);
537 if (v == NULL)
538 goto onError;
539
540 return v;
541
542 onError:
543 return NULL;
544}
545
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000546PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000547 const char *encoding,
548 const char *errors)
549{
550 PyObject *v;
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000553 if (v == NULL)
554 goto onError;
555
556#ifdef Py_USING_UNICODE
557 /* Convert Unicode to a string using the default encoding */
558 if (PyUnicode_Check(v)) {
559 PyObject *temp = v;
560 v = PyUnicode_AsEncodedString(v, NULL, NULL);
561 Py_DECREF(temp);
562 if (v == NULL)
563 goto onError;
564 }
565#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000566 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000567 PyErr_Format(PyExc_TypeError,
568 "encoder did not return a string object (type=%.400s)",
569 Py_TYPE(v)->tp_name);
570 Py_DECREF(v);
571 goto onError;
572 }
573
574 return v;
575
576 onError:
577 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578}
579
580static void
Christian Heimes44720832008-05-26 13:01:01 +0000581string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000582{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000583 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000584 case SSTATE_NOT_INTERNED:
585 break;
586
587 case SSTATE_INTERNED_MORTAL:
588 /* revive dead object temporarily for DelItem */
589 Py_REFCNT(op) = 3;
590 if (PyDict_DelItem(interned, op) != 0)
591 Py_FatalError(
592 "deletion of interned string failed");
593 break;
594
595 case SSTATE_INTERNED_IMMORTAL:
596 Py_FatalError("Immortal interned string died.");
597
598 default:
599 Py_FatalError("Inconsistent interned string state.");
600 }
601 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000602}
603
Christian Heimes44720832008-05-26 13:01:01 +0000604/* Unescape a backslash-escaped string. If unicode is non-zero,
605 the string is a u-literal. If recode_encoding is non-zero,
606 the string is UTF-8 encoded and should be re-encoded in the
607 specified encoding. */
608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000609PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000610 Py_ssize_t len,
611 const char *errors,
612 Py_ssize_t unicode,
613 const char *recode_encoding)
614{
615 int c;
616 char *p, *buf;
617 const char *end;
618 PyObject *v;
619 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000620 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000621 if (v == NULL)
622 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000623 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000624 end = s + len;
625 while (s < end) {
626 if (*s != '\\') {
627 non_esc:
628#ifdef Py_USING_UNICODE
629 if (recode_encoding && (*s & 0x80)) {
630 PyObject *u, *w;
631 char *r;
632 const char* t;
633 Py_ssize_t rn;
634 t = s;
635 /* Decode non-ASCII bytes as UTF-8. */
636 while (t < end && (*t & 0x80)) t++;
637 u = PyUnicode_DecodeUTF8(s, t - s, errors);
638 if(!u) goto failed;
639
640 /* Recode them in target encoding. */
641 w = PyUnicode_AsEncodedString(
642 u, recode_encoding, errors);
643 Py_DECREF(u);
644 if (!w) goto failed;
645
646 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000647 assert(PyString_Check(w));
648 r = PyString_AS_STRING(w);
649 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000650 Py_MEMCPY(p, r, rn);
651 p += rn;
652 Py_DECREF(w);
653 s = t;
654 } else {
655 *p++ = *s++;
656 }
657#else
658 *p++ = *s++;
659#endif
660 continue;
661 }
662 s++;
663 if (s==end) {
664 PyErr_SetString(PyExc_ValueError,
665 "Trailing \\ in string");
666 goto failed;
667 }
668 switch (*s++) {
669 /* XXX This assumes ASCII! */
670 case '\n': break;
671 case '\\': *p++ = '\\'; break;
672 case '\'': *p++ = '\''; break;
673 case '\"': *p++ = '\"'; break;
674 case 'b': *p++ = '\b'; break;
675 case 'f': *p++ = '\014'; break; /* FF */
676 case 't': *p++ = '\t'; break;
677 case 'n': *p++ = '\n'; break;
678 case 'r': *p++ = '\r'; break;
679 case 'v': *p++ = '\013'; break; /* VT */
680 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
681 case '0': case '1': case '2': case '3':
682 case '4': case '5': case '6': case '7':
683 c = s[-1] - '0';
684 if (s < end && '0' <= *s && *s <= '7') {
685 c = (c<<3) + *s++ - '0';
686 if (s < end && '0' <= *s && *s <= '7')
687 c = (c<<3) + *s++ - '0';
688 }
689 *p++ = c;
690 break;
691 case 'x':
692 if (s+1 < end &&
693 isxdigit(Py_CHARMASK(s[0])) &&
694 isxdigit(Py_CHARMASK(s[1])))
695 {
696 unsigned int x = 0;
697 c = Py_CHARMASK(*s);
698 s++;
699 if (isdigit(c))
700 x = c - '0';
701 else if (islower(c))
702 x = 10 + c - 'a';
703 else
704 x = 10 + c - 'A';
705 x = x << 4;
706 c = Py_CHARMASK(*s);
707 s++;
708 if (isdigit(c))
709 x += c - '0';
710 else if (islower(c))
711 x += 10 + c - 'a';
712 else
713 x += 10 + c - 'A';
714 *p++ = x;
715 break;
716 }
717 if (!errors || strcmp(errors, "strict") == 0) {
718 PyErr_SetString(PyExc_ValueError,
719 "invalid \\x escape");
720 goto failed;
721 }
722 if (strcmp(errors, "replace") == 0) {
723 *p++ = '?';
724 } else if (strcmp(errors, "ignore") == 0)
725 /* do nothing */;
726 else {
727 PyErr_Format(PyExc_ValueError,
728 "decoding error; "
729 "unknown error handling code: %.400s",
730 errors);
731 goto failed;
732 }
733#ifndef Py_USING_UNICODE
734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
743#endif
744 default:
745 *p++ = '\\';
746 s--;
747 goto non_esc; /* an arbitry number of unescaped
748 UTF-8 bytes may follow. */
749 }
750 }
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +0000751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000753 return v;
754 failed:
755 Py_DECREF(v);
756 return NULL;
757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Christian Heimes44720832008-05-26 13:01:01 +0000765 char *s;
766 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000767 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Christian Heimes44720832008-05-26 13:01:01 +0000775 char *s;
776 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000777 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000785 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000793 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000794 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Christian Heimes44720832008-05-26 13:01:01 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Christian Heimes44720832008-05-26 13:01:01 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
824
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000825 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000826 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Christian Heimes44720832008-05-26 13:01:01 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Christian Heimes44720832008-05-26 13:01:01 +0000859 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000860 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000861 int ret;
862 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
884#ifdef __VMS
885 if (size) fwrite(data, (int)size, 1, fp);
886#else
887 fwrite(data, 1, (int)size, fp);
888#endif
889 Py_END_ALLOW_THREADS
890 return 0;
891 }
892
893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
898
899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000928 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000934 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000935 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000936 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000945
Christian Heimes44720832008-05-26 13:01:01 +0000946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
952
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000953 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
974 }
975 else
976 *p++ = c;
977 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000979 *p++ = quote;
980 *p = '\0';
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +0000981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000983 return v;
984 }
985}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000986
987static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000988string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000990 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991}
992
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000994string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000995{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000998 Py_INCREF(s);
999 return s;
1000 }
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +00001005 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
Christian Heimes44720832008-05-26 13:01:01 +00001008static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001009string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001010{
1011 return Py_SIZE(a);
1012}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013
Christian Heimes44720832008-05-26 13:01:01 +00001014static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001015string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001016{
1017 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020#ifdef Py_USING_UNICODE
1021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
1023#endif
1024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1030 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001031#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +00001032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1038 }
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1041 }
1042 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +00001043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053
Christian Heimes44720832008-05-26 13:01:01 +00001054 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1059 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001061 if (op == NULL)
1062 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001063 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
1070#undef b
1071}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001072
Christian Heimes44720832008-05-26 13:01:01 +00001073static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001075{
1076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001079 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1085 */
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001093 Py_INCREF(a);
1094 return (PyObject *)a;
1095 }
1096 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001097 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1101 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001103 if (op == NULL)
1104 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001105 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1112 }
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1117 }
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1122 }
1123 return (PyObject *) op;
1124}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001125
Christian Heimes44720832008-05-26 13:01:01 +00001126/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001129string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001130 register Py_ssize_t j)
1131 /* j -- may be negative! */
1132{
1133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1143 }
1144 if (j < i)
1145 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001147}
1148
1149static int
1150string_contains(PyObject *str_obj, PyObject *sub_obj)
1151{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001152 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001153#ifdef Py_USING_UNICODE
1154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
1156#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001157 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1162 }
1163 }
1164
1165 return stringlib_contains_obj(str_obj, sub_obj);
1166}
1167
1168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001169string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001170{
1171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1176 }
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001180 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001181 else {
1182#ifdef COUNT_ALLOCS
1183 one_strings++;
1184#endif
1185 Py_INCREF(v);
1186 }
1187 return v;
1188}
1189
1190static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
1193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
1197
1198 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001199 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001200 result = Py_NotImplemented;
1201 goto out;
1202 }
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1211 }
1212 }
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1222 }
1223 goto out;
1224 }
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1245 }
1246 result = c ? Py_True : Py_False;
1247 out:
1248 Py_INCREF(result);
1249 return result;
1250}
1251
1252int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001254{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001257 return Py_SIZE(a) == Py_SIZE(b)
1258 && *a->ob_sval == *b->ob_sval
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260}
1261
1262static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001263string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001264{
1265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
1268
1269 if (a->ob_shash != -1)
1270 return a->ob_shash;
1271 len = Py_SIZE(a);
1272 p = (unsigned char *) a->ob_sval;
1273 x = *p << 7;
1274 while (--len >= 0)
1275 x = (1000003*x) ^ *p++;
1276 x ^= Py_SIZE(a);
1277 if (x == -1)
1278 x = -2;
1279 a->ob_shash = x;
1280 return x;
1281}
1282
1283static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001284string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001285{
1286 if (PyIndex_Check(item)) {
1287 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1288 if (i == -1 && PyErr_Occurred())
1289 return NULL;
1290 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001291 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001292 return string_item(self, i);
1293 }
1294 else if (PySlice_Check(item)) {
1295 Py_ssize_t start, stop, step, slicelength, cur, i;
1296 char* source_buf;
1297 char* result_buf;
1298 PyObject* result;
1299
1300 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001301 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001302 &start, &stop, &step, &slicelength) < 0) {
1303 return NULL;
1304 }
1305
1306 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001307 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001308 }
1309 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001310 slicelength == PyString_GET_SIZE(self) &&
1311 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001312 Py_INCREF(self);
1313 return (PyObject *)self;
1314 }
1315 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001316 return PyString_FromStringAndSize(
1317 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001318 slicelength);
1319 }
1320 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001321 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001322 result_buf = (char *)PyMem_Malloc(slicelength);
1323 if (result_buf == NULL)
1324 return PyErr_NoMemory();
1325
1326 for (cur = start, i = 0; i < slicelength;
1327 cur += step, i++) {
1328 result_buf[i] = source_buf[cur];
1329 }
1330
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001331 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001332 slicelength);
1333 PyMem_Free(result_buf);
1334 return result;
1335 }
1336 }
1337 else {
1338 PyErr_Format(PyExc_TypeError,
1339 "string indices must be integers, not %.200s",
1340 Py_TYPE(item)->tp_name);
1341 return NULL;
1342 }
1343}
1344
1345static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001346string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001347{
1348 if ( index != 0 ) {
1349 PyErr_SetString(PyExc_SystemError,
1350 "accessing non-existent string segment");
1351 return -1;
1352 }
1353 *ptr = (void *)self->ob_sval;
1354 return Py_SIZE(self);
1355}
1356
1357static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001358string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001359{
1360 PyErr_SetString(PyExc_TypeError,
1361 "Cannot use string as modifiable buffer");
1362 return -1;
1363}
1364
1365static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001366string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001367{
1368 if ( lenp )
1369 *lenp = Py_SIZE(self);
1370 return 1;
1371}
1372
1373static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001374string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001375{
1376 if ( index != 0 ) {
1377 PyErr_SetString(PyExc_SystemError,
1378 "accessing non-existent string segment");
1379 return -1;
1380 }
1381 *ptr = self->ob_sval;
1382 return Py_SIZE(self);
1383}
1384
1385static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001386string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001387{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001388 return PyBuffer_FillInfo(view, (PyObject*)self,
1389 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001390 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001391}
1392
1393static PySequenceMethods string_as_sequence = {
1394 (lenfunc)string_length, /*sq_length*/
1395 (binaryfunc)string_concat, /*sq_concat*/
1396 (ssizeargfunc)string_repeat, /*sq_repeat*/
1397 (ssizeargfunc)string_item, /*sq_item*/
1398 (ssizessizeargfunc)string_slice, /*sq_slice*/
1399 0, /*sq_ass_item*/
1400 0, /*sq_ass_slice*/
1401 (objobjproc)string_contains /*sq_contains*/
1402};
1403
1404static PyMappingMethods string_as_mapping = {
1405 (lenfunc)string_length,
1406 (binaryfunc)string_subscript,
1407 0,
1408};
1409
1410static PyBufferProcs string_as_buffer = {
1411 (readbufferproc)string_buffer_getreadbuf,
1412 (writebufferproc)string_buffer_getwritebuf,
1413 (segcountproc)string_buffer_getsegcount,
1414 (charbufferproc)string_buffer_getcharbuf,
1415 (getbufferproc)string_buffer_getbuffer,
1416 0, /* XXX */
1417};
1418
1419
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001420
Christian Heimes44720832008-05-26 13:01:01 +00001421#define LEFTSTRIP 0
1422#define RIGHTSTRIP 1
1423#define BOTHSTRIP 2
1424
1425/* Arrays indexed by above */
1426static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427
1428#define STRIPNAME(i) (stripformat[i]+3)
1429
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001431"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001432\n\
Christian Heimes44720832008-05-26 13:01:01 +00001433Return a list of the words in the string S, using sep as the\n\
1434delimiter string. If maxsplit is given, at most maxsplit\n\
1435splits are done. If sep is not specified or is None, any\n\
1436whitespace string is a separator and empty strings are removed\n\
1437from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Antoine Pitrou64672132010-01-13 07:55:48 +00001442 Py_ssize_t len = PyString_GET_SIZE(self), n;
1443 Py_ssize_t maxsplit = -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001444 const char *s = PyString_AS_STRING(self), *sub;
Antoine Pitrou64672132010-01-13 07:55:48 +00001445 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Christian Heimes44720832008-05-26 13:01:01 +00001447 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1448 return NULL;
1449 if (maxsplit < 0)
1450 maxsplit = PY_SSIZE_T_MAX;
1451 if (subobj == Py_None)
Antoine Pitrou64672132010-01-13 07:55:48 +00001452 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001453 if (PyString_Check(subobj)) {
1454 sub = PyString_AS_STRING(subobj);
1455 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001456 }
1457#ifdef Py_USING_UNICODE
1458 else if (PyUnicode_Check(subobj))
1459 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1460#endif
1461 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001463
Antoine Pitrou64672132010-01-13 07:55:48 +00001464 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465}
1466
1467PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001468"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001470Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001471the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001472found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001473
1474static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001475string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476{
Christian Heimes44720832008-05-26 13:01:01 +00001477 const char *sep;
1478 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001480 if (PyString_Check(sep_obj)) {
1481 sep = PyString_AS_STRING(sep_obj);
1482 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001483 }
1484#ifdef Py_USING_UNICODE
1485 else if (PyUnicode_Check(sep_obj))
1486 return PyUnicode_Partition((PyObject *) self, sep_obj);
1487#endif
1488 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1489 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
Christian Heimes44720832008-05-26 13:01:01 +00001491 return stringlib_partition(
1492 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001493 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001494 sep_obj, sep, sep_len
1495 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496}
1497
1498PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001499"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001501Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001502the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001503separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001506string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Christian Heimes44720832008-05-26 13:01:01 +00001508 const char *sep;
1509 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001511 if (PyString_Check(sep_obj)) {
1512 sep = PyString_AS_STRING(sep_obj);
1513 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001514 }
1515#ifdef Py_USING_UNICODE
1516 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001517 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001518#endif
1519 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001521
Christian Heimes44720832008-05-26 13:01:01 +00001522 return stringlib_rpartition(
1523 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001524 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001525 sep_obj, sep, sep_len
1526 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527}
1528
Christian Heimes1a6387e2008-03-26 12:49:49 +00001529PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001530"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531\n\
Christian Heimes44720832008-05-26 13:01:01 +00001532Return a list of the words in the string S, using sep as the\n\
1533delimiter string, starting at the end of the string and working\n\
1534to the front. If maxsplit is given, at most maxsplit splits are\n\
1535done. If sep is not specified or is None, any whitespace string\n\
1536is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001537
1538static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001539string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540{
Antoine Pitrou64672132010-01-13 07:55:48 +00001541 Py_ssize_t len = PyString_GET_SIZE(self), n;
1542 Py_ssize_t maxsplit = -1;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001543 const char *s = PyString_AS_STRING(self), *sub;
Antoine Pitrou64672132010-01-13 07:55:48 +00001544 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001545
Christian Heimes44720832008-05-26 13:01:01 +00001546 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1547 return NULL;
1548 if (maxsplit < 0)
1549 maxsplit = PY_SSIZE_T_MAX;
1550 if (subobj == Py_None)
Antoine Pitrou64672132010-01-13 07:55:48 +00001551 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001555 }
1556#ifdef Py_USING_UNICODE
1557 else if (PyUnicode_Check(subobj))
1558 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1559#endif
1560 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1561 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001562
Antoine Pitrou64672132010-01-13 07:55:48 +00001563 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001564}
1565
1566
1567PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001568"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001569\n\
1570Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001571iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001572
1573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001574string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001575{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001576 char *sep = PyString_AS_STRING(self);
1577 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001578 PyObject *res = NULL;
1579 char *p;
1580 Py_ssize_t seqlen = 0;
1581 size_t sz = 0;
1582 Py_ssize_t i;
1583 PyObject *seq, *item;
1584
1585 seq = PySequence_Fast(orig, "");
1586 if (seq == NULL) {
1587 return NULL;
1588 }
1589
1590 seqlen = PySequence_Size(seq);
1591 if (seqlen == 0) {
1592 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001593 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001594 }
1595 if (seqlen == 1) {
1596 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001597 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001598 Py_INCREF(item);
1599 Py_DECREF(seq);
1600 return item;
1601 }
1602 }
1603
1604 /* There are at least two things to join, or else we have a subclass
1605 * of the builtin types in the sequence.
1606 * Do a pre-pass to figure out the total amount of space we'll
1607 * need (sz), see whether any argument is absurd, and defer to
1608 * the Unicode join if appropriate.
1609 */
1610 for (i = 0; i < seqlen; i++) {
1611 const size_t old_sz = sz;
1612 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001613 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001614#ifdef Py_USING_UNICODE
1615 if (PyUnicode_Check(item)) {
1616 /* Defer to Unicode join.
1617 * CAUTION: There's no gurantee that the
1618 * original sequence can be iterated over
1619 * again, so we must pass seq here.
1620 */
1621 PyObject *result;
1622 result = PyUnicode_Join((PyObject *)self, seq);
1623 Py_DECREF(seq);
1624 return result;
1625 }
1626#endif
1627 PyErr_Format(PyExc_TypeError,
1628 "sequence item %zd: expected string,"
1629 " %.80s found",
1630 i, Py_TYPE(item)->tp_name);
1631 Py_DECREF(seq);
1632 return NULL;
1633 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001634 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001635 if (i != 0)
1636 sz += seplen;
1637 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1638 PyErr_SetString(PyExc_OverflowError,
1639 "join() result is too long for a Python string");
1640 Py_DECREF(seq);
1641 return NULL;
1642 }
1643 }
1644
1645 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001646 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001647 if (res == NULL) {
1648 Py_DECREF(seq);
1649 return NULL;
1650 }
1651
1652 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001653 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001654 for (i = 0; i < seqlen; ++i) {
1655 size_t n;
1656 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001657 n = PyString_GET_SIZE(item);
1658 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001659 p += n;
1660 if (i < seqlen - 1) {
1661 Py_MEMCPY(p, sep, seplen);
1662 p += seplen;
1663 }
1664 }
1665
1666 Py_DECREF(seq);
1667 return res;
1668}
1669
1670PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001671_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001672{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001673 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001674 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001675 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001676}
1677
Antoine Pitrou64672132010-01-13 07:55:48 +00001678/* helper macro to fixup start/end slice values */
1679#define ADJUST_INDICES(start, end, len) \
1680 if (end > len) \
1681 end = len; \
1682 else if (end < 0) { \
1683 end += len; \
1684 if (end < 0) \
1685 end = 0; \
1686 } \
1687 if (start < 0) { \
1688 start += len; \
1689 if (start < 0) \
1690 start = 0; \
1691 }
Christian Heimes44720832008-05-26 13:01:01 +00001692
1693Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001694string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001695{
1696 PyObject *subobj;
1697 const char *sub;
1698 Py_ssize_t sub_len;
1699 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700 PyObject *obj_start=Py_None, *obj_end=Py_None;
1701
1702 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703 &obj_start, &obj_end))
1704 return -2;
1705 /* To support None in "start" and "end" arguments, meaning
1706 the same as if they were not passed.
1707 */
1708 if (obj_start != Py_None)
1709 if (!_PyEval_SliceIndex(obj_start, &start))
1710 return -2;
1711 if (obj_end != Py_None)
1712 if (!_PyEval_SliceIndex(obj_end, &end))
1713 return -2;
1714
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001715 if (PyString_Check(subobj)) {
1716 sub = PyString_AS_STRING(subobj);
1717 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001718 }
1719#ifdef Py_USING_UNICODE
1720 else if (PyUnicode_Check(subobj))
1721 return PyUnicode_Find(
1722 (PyObject *)self, subobj, start, end, dir);
1723#endif
1724 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725 /* XXX - the "expected a character buffer object" is pretty
1726 confusing for a non-expert. remap to something else ? */
1727 return -2;
1728
1729 if (dir > 0)
1730 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001732 sub, sub_len, start, end);
1733 else
1734 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001735 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001736 sub, sub_len, start, end);
1737}
1738
1739
1740PyDoc_STRVAR(find__doc__,
1741"S.find(sub [,start [,end]]) -> int\n\
1742\n\
1743Return the lowest index in S where substring sub is found,\n\
1744such that sub is contained within s[start:end]. Optional\n\
1745arguments start and end are interpreted as in slice notation.\n\
1746\n\
1747Return -1 on failure.");
1748
1749static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001750string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001751{
1752 Py_ssize_t result = string_find_internal(self, args, +1);
1753 if (result == -2)
1754 return NULL;
1755 return PyInt_FromSsize_t(result);
1756}
1757
1758
1759PyDoc_STRVAR(index__doc__,
1760"S.index(sub [,start [,end]]) -> int\n\
1761\n\
1762Like S.find() but raise ValueError when the substring is not found.");
1763
1764static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001765string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001766{
1767 Py_ssize_t result = string_find_internal(self, args, +1);
1768 if (result == -2)
1769 return NULL;
1770 if (result == -1) {
1771 PyErr_SetString(PyExc_ValueError,
1772 "substring not found");
1773 return NULL;
1774 }
1775 return PyInt_FromSsize_t(result);
1776}
1777
1778
1779PyDoc_STRVAR(rfind__doc__,
1780"S.rfind(sub [,start [,end]]) -> int\n\
1781\n\
1782Return the highest index in S where substring sub is found,\n\
1783such that sub is contained within s[start:end]. Optional\n\
1784arguments start and end are interpreted as in slice notation.\n\
1785\n\
1786Return -1 on failure.");
1787
1788static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001789string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001790{
1791 Py_ssize_t result = string_find_internal(self, args, -1);
1792 if (result == -2)
1793 return NULL;
1794 return PyInt_FromSsize_t(result);
1795}
1796
1797
1798PyDoc_STRVAR(rindex__doc__,
1799"S.rindex(sub [,start [,end]]) -> int\n\
1800\n\
1801Like S.rfind() but raise ValueError when the substring is not found.");
1802
1803static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001804string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001805{
1806 Py_ssize_t result = string_find_internal(self, args, -1);
1807 if (result == -2)
1808 return NULL;
1809 if (result == -1) {
1810 PyErr_SetString(PyExc_ValueError,
1811 "substring not found");
1812 return NULL;
1813 }
1814 return PyInt_FromSsize_t(result);
1815}
1816
1817
1818Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001819do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001820{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001821 char *s = PyString_AS_STRING(self);
1822 Py_ssize_t len = PyString_GET_SIZE(self);
1823 char *sep = PyString_AS_STRING(sepobj);
1824 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00001825 Py_ssize_t i, j;
1826
1827 i = 0;
1828 if (striptype != RIGHTSTRIP) {
1829 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830 i++;
1831 }
1832 }
1833
1834 j = len;
1835 if (striptype != LEFTSTRIP) {
1836 do {
1837 j--;
1838 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1839 j++;
1840 }
1841
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001842 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001843 Py_INCREF(self);
1844 return (PyObject*)self;
1845 }
1846 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001847 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001848}
1849
1850
1851Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001852do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001853{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001854 char *s = PyString_AS_STRING(self);
1855 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001856
1857 i = 0;
1858 if (striptype != RIGHTSTRIP) {
1859 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860 i++;
1861 }
1862 }
1863
1864 j = len;
1865 if (striptype != LEFTSTRIP) {
1866 do {
1867 j--;
1868 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869 j++;
1870 }
1871
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001872 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001873 Py_INCREF(self);
1874 return (PyObject*)self;
1875 }
1876 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001877 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001878}
1879
1880
1881Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001882do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001883{
1884 PyObject *sep = NULL;
1885
1886 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1887 return NULL;
1888
1889 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001890 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00001891 return do_xstrip(self, striptype, sep);
1892#ifdef Py_USING_UNICODE
1893 else if (PyUnicode_Check(sep)) {
1894 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895 PyObject *res;
1896 if (uniself==NULL)
1897 return NULL;
1898 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899 striptype, sep);
1900 Py_DECREF(uniself);
1901 return res;
1902 }
1903#endif
1904 PyErr_Format(PyExc_TypeError,
1905#ifdef Py_USING_UNICODE
1906 "%s arg must be None, str or unicode",
1907#else
1908 "%s arg must be None or str",
1909#endif
1910 STRIPNAME(striptype));
1911 return NULL;
1912 }
1913
1914 return do_strip(self, striptype);
1915}
1916
1917
1918PyDoc_STRVAR(strip__doc__,
1919"S.strip([chars]) -> string or unicode\n\
1920\n\
1921Return a copy of the string S with leading and trailing\n\
1922whitespace removed.\n\
1923If chars is given and not None, remove characters in chars instead.\n\
1924If chars is unicode, S will be converted to unicode before stripping");
1925
1926static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001927string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001928{
1929 if (PyTuple_GET_SIZE(args) == 0)
1930 return do_strip(self, BOTHSTRIP); /* Common case */
1931 else
1932 return do_argstrip(self, BOTHSTRIP, args);
1933}
1934
1935
1936PyDoc_STRVAR(lstrip__doc__,
1937"S.lstrip([chars]) -> string or unicode\n\
1938\n\
1939Return a copy of the string S with leading whitespace removed.\n\
1940If chars is given and not None, remove characters in chars instead.\n\
1941If chars is unicode, S will be converted to unicode before stripping");
1942
1943static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001944string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001945{
1946 if (PyTuple_GET_SIZE(args) == 0)
1947 return do_strip(self, LEFTSTRIP); /* Common case */
1948 else
1949 return do_argstrip(self, LEFTSTRIP, args);
1950}
1951
1952
1953PyDoc_STRVAR(rstrip__doc__,
1954"S.rstrip([chars]) -> string or unicode\n\
1955\n\
1956Return a copy of the string S with trailing whitespace removed.\n\
1957If chars is given and not None, remove characters in chars instead.\n\
1958If chars is unicode, S will be converted to unicode before stripping");
1959
1960static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001961string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001962{
1963 if (PyTuple_GET_SIZE(args) == 0)
1964 return do_strip(self, RIGHTSTRIP); /* Common case */
1965 else
1966 return do_argstrip(self, RIGHTSTRIP, args);
1967}
1968
1969
1970PyDoc_STRVAR(lower__doc__,
1971"S.lower() -> string\n\
1972\n\
1973Return a copy of the string S converted to lowercase.");
1974
1975/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976#ifndef _tolower
1977#define _tolower tolower
1978#endif
1979
1980static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001981string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001982{
1983 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001985 PyObject *newobj;
1986
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001987 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00001988 if (!newobj)
1989 return NULL;
1990
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001991 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001992
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001993 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001994
1995 for (i = 0; i < n; i++) {
1996 int c = Py_CHARMASK(s[i]);
1997 if (isupper(c))
1998 s[i] = _tolower(c);
1999 }
2000
2001 return newobj;
2002}
2003
2004PyDoc_STRVAR(upper__doc__,
2005"S.upper() -> string\n\
2006\n\
2007Return a copy of the string S converted to uppercase.");
2008
2009#ifndef _toupper
2010#define _toupper toupper
2011#endif
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002017 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002018 PyObject *newobj;
2019
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002020 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002021 if (!newobj)
2022 return NULL;
2023
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002024 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002025
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002026 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002027
2028 for (i = 0; i < n; i++) {
2029 int c = Py_CHARMASK(s[i]);
2030 if (islower(c))
2031 s[i] = _toupper(c);
2032 }
2033
2034 return newobj;
2035}
2036
2037PyDoc_STRVAR(title__doc__,
2038"S.title() -> string\n\
2039\n\
2040Return a titlecased version of S, i.e. words start with uppercase\n\
2041characters, all remaining cased characters have lowercase.");
2042
2043static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002046 char *s = PyString_AS_STRING(self), *s_new;
2047 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002048 int previous_is_cased = 0;
2049 PyObject *newobj;
2050
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002051 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002052 if (newobj == NULL)
2053 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002054 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002055 for (i = 0; i < n; i++) {
2056 int c = Py_CHARMASK(*s++);
2057 if (islower(c)) {
2058 if (!previous_is_cased)
2059 c = toupper(c);
2060 previous_is_cased = 1;
2061 } else if (isupper(c)) {
2062 if (previous_is_cased)
2063 c = tolower(c);
2064 previous_is_cased = 1;
2065 } else
2066 previous_is_cased = 0;
2067 *s_new++ = c;
2068 }
2069 return newobj;
2070}
2071
2072PyDoc_STRVAR(capitalize__doc__,
2073"S.capitalize() -> string\n\
2074\n\
2075Return a copy of the string S with only its first character\n\
2076capitalized.");
2077
2078static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002080{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002081 char *s = PyString_AS_STRING(self), *s_new;
2082 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002083 PyObject *newobj;
2084
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002085 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002086 if (newobj == NULL)
2087 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002088 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002089 if (0 < n) {
2090 int c = Py_CHARMASK(*s++);
2091 if (islower(c))
2092 *s_new = toupper(c);
2093 else
2094 *s_new = c;
2095 s_new++;
2096 }
2097 for (i = 1; i < n; i++) {
2098 int c = Py_CHARMASK(*s++);
2099 if (isupper(c))
2100 *s_new = tolower(c);
2101 else
2102 *s_new = c;
2103 s_new++;
2104 }
2105 return newobj;
2106}
2107
2108
2109PyDoc_STRVAR(count__doc__,
2110"S.count(sub[, start[, end]]) -> int\n\
2111\n\
2112Return the number of non-overlapping occurrences of substring sub in\n\
2113string S[start:end]. Optional arguments start and end are interpreted\n\
2114as in slice notation.");
2115
2116static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002118{
2119 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002120 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002121 Py_ssize_t sub_len;
2122 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2123
2124 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2126 return NULL;
2127
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002128 if (PyString_Check(sub_obj)) {
2129 sub = PyString_AS_STRING(sub_obj);
2130 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002131 }
2132#ifdef Py_USING_UNICODE
2133 else if (PyUnicode_Check(sub_obj)) {
2134 Py_ssize_t count;
2135 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2136 if (count == -1)
2137 return NULL;
2138 else
2139 return PyInt_FromSsize_t(count);
2140 }
2141#endif
2142 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143 return NULL;
2144
Antoine Pitrou64672132010-01-13 07:55:48 +00002145 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002146
2147 return PyInt_FromSsize_t(
Antoine Pitrou64672132010-01-13 07:55:48 +00002148 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
Christian Heimes44720832008-05-26 13:01:01 +00002149 );
2150}
2151
2152PyDoc_STRVAR(swapcase__doc__,
2153"S.swapcase() -> string\n\
2154\n\
2155Return a copy of the string S with uppercase characters\n\
2156converted to lowercase and vice versa.");
2157
2158static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002159string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002160{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002161 char *s = PyString_AS_STRING(self), *s_new;
2162 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002163 PyObject *newobj;
2164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002165 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002166 if (newobj == NULL)
2167 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002168 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002169 for (i = 0; i < n; i++) {
2170 int c = Py_CHARMASK(*s++);
2171 if (islower(c)) {
2172 *s_new = toupper(c);
2173 }
2174 else if (isupper(c)) {
2175 *s_new = tolower(c);
2176 }
2177 else
2178 *s_new = c;
2179 s_new++;
2180 }
2181 return newobj;
2182}
2183
2184
2185PyDoc_STRVAR(translate__doc__,
2186"S.translate(table [,deletechars]) -> string\n\
2187\n\
2188Return a copy of the string S, where all characters occurring\n\
2189in the optional argument deletechars are removed, and the\n\
2190remaining characters have been mapped through the given\n\
2191translation table, which must be a string of length 256.");
2192
2193static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002194string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002195{
2196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
2205
2206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
2209
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002213 }
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2217 }
2218#ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2227 }
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 }
2230#endif
2231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
2233
2234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2238 }
2239
2240 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002244 }
2245#ifdef Py_USING_UNICODE
2246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2250 }
2251#endif
2252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2254 }
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2258 }
2259
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002262 if (result == NULL)
2263 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002266
2267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2272 changed = 1;
2273 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002274 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2279 }
2280
2281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2287 }
2288
2289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2291
2292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2298 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002299 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304 /* Fix the size of the resulting string */
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +00002305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002307 return result;
2308}
2309
2310
Christian Heimes44720832008-05-26 13:01:01 +00002311/* find and count characters and substrings */
2312
2313#define findchar(target, target_len, c) \
2314 ((char *)memchr((const void *)(target), c, target_len))
2315
2316/* String ops must return a string. */
2317/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002318Py_LOCAL(PyStringObject *)
2319return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002321 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002322 Py_INCREF(self);
2323 return self;
2324 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002328}
2329
2330Py_LOCAL_INLINE(Py_ssize_t)
2331countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332{
2333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
2336
2337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2342 }
2343 return count;
2344}
2345
Christian Heimes44720832008-05-26 13:01:01 +00002346
2347/* Algorithms for different cases of string replacement */
2348
2349/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002350Py_LOCAL(PyStringObject *)
2351replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
2354{
2355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002358 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002359
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002360 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002361
2362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
2366
2367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2374 }
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2380 }
2381
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002384 return NULL;
2385
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002388
2389 /* TODO: special case single character, which doesn't need memcpy */
2390
2391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
2395
2396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 }
2401
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405 return result;
2406}
2407
2408/* Special case for deleting a single character */
2409/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410Py_LOCAL(PyStringObject *)
2411replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002412 char from_c, Py_ssize_t maxcount)
2413{
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002418 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002419
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002422
2423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2426 }
2427
2428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
2430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002433 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002434 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002435
2436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2445 }
2446 Py_MEMCPY(result_s, start, end-start);
2447
2448 return result;
2449}
2450
2451/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002453Py_LOCAL(PyStringObject *)
2454replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002461 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002462
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrou64672132010-01-13 07:55:48 +00002466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002469
2470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2473 }
2474
2475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
2477
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002480 return NULL;
2481
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002482 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002483
2484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2489 0);
Christian Heimes44720832008-05-26 13:01:01 +00002490 if (offset == -1)
2491 break;
2492 next = start + offset;
2493
2494 Py_MEMCPY(result_s, start, next-start);
2495
2496 result_s += (next-start);
2497 start = next+from_len;
2498 }
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
2501}
2502
2503/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002504Py_LOCAL(PyStringObject *)
2505replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
2508{
2509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002511 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002512
2513 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002516
2517 next = findchar(self_s, self_len, from_c);
2518
2519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2522 }
2523
2524 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002526 if (result == NULL)
2527 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002528 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002529 Py_MEMCPY(result_s, self_s, self_len);
2530
2531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
2536
2537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2543 }
2544
2545 return result;
2546}
2547
2548/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549Py_LOCAL(PyStringObject *)
2550replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
2554{
2555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002558 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002559
2560 /* The result string will be the same size */
2561
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrou64672132010-01-13 07:55:48 +00002565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2567 0);
Christian Heimes44720832008-05-26 13:01:01 +00002568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2571 }
2572
2573 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002575 if (result == NULL)
2576 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002577 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002578 Py_MEMCPY(result_s, self_s, self_len);
2579
2580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
2585
2586 while ( --maxcount > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2589 0);
Christian Heimes44720832008-05-26 13:01:01 +00002590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2594 }
2595
2596 return result;
2597}
2598
2599/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002600Py_LOCAL(PyStringObject *)
2601replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
2605{
2606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002610 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002611
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002614
2615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2619 }
2620
2621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2632 }
2633
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002636 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002637 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002638
2639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
2645
2646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2658 }
2659 }
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
2662
2663 return result;
2664}
2665
2666/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002667Py_LOCAL(PyStringObject *)
2668replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002676 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002677
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002680
Antoine Pitrou64672132010-01-13 07:55:48 +00002681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
2684
Christian Heimes44720832008-05-26 13:01:01 +00002685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2688 }
2689
2690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2696 }
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
2702
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002705 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002706 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002707
2708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
Christian Heimes44720832008-05-26 13:01:01 +00002714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2729 }
2730 }
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
2733
2734 return result;
2735}
2736
2737
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002738Py_LOCAL(PyStringObject *)
2739replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
2743{
2744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00002747 /* nothing to do; return the original string */
2748 return return_self(self);
2749 }
2750
2751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2755 }
2756
2757 /* Handle zero-length special cases */
2758
2759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2764 }
2765
2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002769 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00002770 return return_self(self);
2771 }
2772
2773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2780 }
2781 }
2782
2783 /* Handle special case where both strings have the same length */
2784
2785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2795 }
2796 }
2797
2798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805 }
2806}
2807
2808PyDoc_STRVAR(replace__doc__,
2809"S.replace (old, new[, count]) -> string\n\
2810\n\
2811Return a copy of string S with all occurrences of substring\n\
2812old replaced by new. If the optional argument count is\n\
2813given, only the first count occurrences are replaced.");
2814
2815static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002816string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002817{
2818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
2822
2823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
2825
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00002829 }
2830#ifdef Py_USING_UNICODE
2831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
2834#endif
2835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
2837
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00002841 }
2842#ifdef Py_USING_UNICODE
2843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
2846#endif
2847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
2849
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002850 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00002851 from_s, from_len,
2852 to_s, to_len, count);
2853}
2854
2855/** End DALKE **/
2856
2857/* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2860 */
2861Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002862_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00002863 Py_ssize_t end, int direction)
2864{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
2869
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00002873 }
2874#ifdef Py_USING_UNICODE
2875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
2878#endif
2879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002881 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002882
Antoine Pitrou64672132010-01-13 07:55:48 +00002883 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002884
2885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
2893
2894 if (end-slen > start)
2895 start = end - slen;
2896 }
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
2900}
2901
2902
2903PyDoc_STRVAR(startswith__doc__,
2904"S.startswith(prefix[, start[, end]]) -> bool\n\
2905\n\
2906Return True if S starts with the specified prefix, False otherwise.\n\
2907With optional start, test S beginning at that position.\n\
2908With optional end, stop comparing S at that position.\n\
2909prefix can also be a tuple of strings to try.");
2910
2911static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002912string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002913{
2914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
2918
2919 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2932 }
2933 }
2934 Py_RETURN_FALSE;
2935 }
2936 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (result == -1)
2938 return NULL;
2939 else
2940 return PyBool_FromLong(result);
2941}
2942
2943
2944PyDoc_STRVAR(endswith__doc__,
2945"S.endswith(suffix[, start[, end]]) -> bool\n\
2946\n\
2947Return True if S ends with the specified suffix, False otherwise.\n\
2948With optional start, test S beginning at that position.\n\
2949With optional end, stop comparing S at that position.\n\
2950suffix can also be a tuple of strings to try.");
2951
2952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002954{
2955 Py_ssize_t start = 0;
2956 Py_ssize_t end = PY_SSIZE_T_MAX;
2957 PyObject *subobj;
2958 int result;
2959
2960 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2962 return NULL;
2963 if (PyTuple_Check(subobj)) {
2964 Py_ssize_t i;
2965 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966 result = _string_tailmatch(self,
2967 PyTuple_GET_ITEM(subobj, i),
2968 start, end, +1);
2969 if (result == -1)
2970 return NULL;
2971 else if (result) {
2972 Py_RETURN_TRUE;
2973 }
2974 }
2975 Py_RETURN_FALSE;
2976 }
2977 result = _string_tailmatch(self, subobj, start, end, +1);
2978 if (result == -1)
2979 return NULL;
2980 else
2981 return PyBool_FromLong(result);
2982}
2983
2984
2985PyDoc_STRVAR(encode__doc__,
2986"S.encode([encoding[,errors]]) -> object\n\
2987\n\
2988Encodes S using the codec registered for encoding. encoding defaults\n\
2989to the default encoding. errors may be given to set a different error\n\
2990handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992'xmlcharrefreplace' as well as any other name registered with\n\
2993codecs.register_error that is able to handle UnicodeEncodeErrors.");
2994
2995static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002996string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002997{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002998 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002999 char *encoding = NULL;
3000 char *errors = NULL;
3001 PyObject *v;
3002
Benjamin Peterson332d7212009-09-18 21:14:55 +00003003 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3004 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003005 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003006 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003007 if (v == NULL)
3008 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003010 PyErr_Format(PyExc_TypeError,
3011 "encoder did not return a string/unicode object "
3012 "(type=%.400s)",
3013 Py_TYPE(v)->tp_name);
3014 Py_DECREF(v);
3015 return NULL;
3016 }
3017 return v;
3018
3019 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003020 return NULL;
3021}
3022
Christian Heimes44720832008-05-26 13:01:01 +00003023
3024PyDoc_STRVAR(decode__doc__,
3025"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003026\n\
Christian Heimes44720832008-05-26 13:01:01 +00003027Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003028to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003029handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003031as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032able to handle UnicodeDecodeErrors.");
3033
3034static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003035string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003036{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003037 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003038 char *encoding = NULL;
3039 char *errors = NULL;
3040 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003041
Benjamin Peterson332d7212009-09-18 21:14:55 +00003042 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3043 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003046 if (v == NULL)
3047 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003048 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003049 PyErr_Format(PyExc_TypeError,
3050 "decoder did not return a string/unicode object "
3051 "(type=%.400s)",
3052 Py_TYPE(v)->tp_name);
3053 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003054 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003055 }
3056 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003057
Christian Heimes44720832008-05-26 13:01:01 +00003058 onError:
3059 return NULL;
3060}
3061
3062
3063PyDoc_STRVAR(expandtabs__doc__,
3064"S.expandtabs([tabsize]) -> string\n\
3065\n\
3066Return a copy of S where all tab characters are expanded using spaces.\n\
3067If tabsize is not given, a tab size of 8 characters is assumed.");
3068
3069static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003070string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003071{
3072 const char *e, *p, *qe;
3073 char *q;
3074 Py_ssize_t i, j, incr;
3075 PyObject *u;
3076 int tabsize = 8;
3077
3078 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3079 return NULL;
3080
3081 /* First pass: determine size of output string */
3082 i = 0; /* chars up to and including most recent \n or \r */
3083 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003084 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3085 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003086 if (*p == '\t') {
3087 if (tabsize > 0) {
3088 incr = tabsize - (j % tabsize);
3089 if (j > PY_SSIZE_T_MAX - incr)
3090 goto overflow1;
3091 j += incr;
3092 }
3093 }
3094 else {
3095 if (j > PY_SSIZE_T_MAX - 1)
3096 goto overflow1;
3097 j++;
3098 if (*p == '\n' || *p == '\r') {
3099 if (i > PY_SSIZE_T_MAX - j)
3100 goto overflow1;
3101 i += j;
3102 j = 0;
3103 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003104 }
Christian Heimes44720832008-05-26 13:01:01 +00003105
3106 if (i > PY_SSIZE_T_MAX - j)
3107 goto overflow1;
3108
3109 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003110 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003111 if (!u)
3112 return NULL;
3113
3114 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003115 q = PyString_AS_STRING(u); /* next output char */
3116 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003117
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003118 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003119 if (*p == '\t') {
3120 if (tabsize > 0) {
3121 i = tabsize - (j % tabsize);
3122 j += i;
3123 while (i--) {
3124 if (q >= qe)
3125 goto overflow2;
3126 *q++ = ' ';
3127 }
3128 }
3129 }
3130 else {
3131 if (q >= qe)
3132 goto overflow2;
3133 *q++ = *p;
3134 j++;
3135 if (*p == '\n' || *p == '\r')
3136 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003137 }
Christian Heimes44720832008-05-26 13:01:01 +00003138
3139 return u;
3140
3141 overflow2:
3142 Py_DECREF(u);
3143 overflow1:
3144 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3145 return NULL;
3146}
3147
3148Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003149pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003150{
3151 PyObject *u;
3152
3153 if (left < 0)
3154 left = 0;
3155 if (right < 0)
3156 right = 0;
3157
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003159 Py_INCREF(self);
3160 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003161 }
3162
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003163 u = PyString_FromStringAndSize(NULL,
3164 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003165 if (u) {
3166 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003167 memset(PyString_AS_STRING(u), fill, left);
3168 Py_MEMCPY(PyString_AS_STRING(u) + left,
3169 PyString_AS_STRING(self),
3170 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003171 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003172 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003173 fill, right);
3174 }
3175
3176 return u;
3177}
3178
3179PyDoc_STRVAR(ljust__doc__,
3180"S.ljust(width[, fillchar]) -> string\n"
3181"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003182"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003183"done using the specified fill character (default is a space).");
3184
3185static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003186string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003187{
3188 Py_ssize_t width;
3189 char fillchar = ' ';
3190
3191 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3192 return NULL;
3193
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003194 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003195 Py_INCREF(self);
3196 return (PyObject*) self;
3197 }
3198
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003199 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003200}
3201
3202
3203PyDoc_STRVAR(rjust__doc__,
3204"S.rjust(width[, fillchar]) -> string\n"
3205"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003206"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003207"done using the specified fill character (default is a space)");
3208
3209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003210string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003211{
3212 Py_ssize_t width;
3213 char fillchar = ' ';
3214
3215 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3216 return NULL;
3217
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003218 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003219 Py_INCREF(self);
3220 return (PyObject*) self;
3221 }
3222
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003223 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003224}
3225
3226
3227PyDoc_STRVAR(center__doc__,
3228"S.center(width[, fillchar]) -> string\n"
3229"\n"
3230"Return S centered in a string of length width. Padding is\n"
3231"done using the specified fill character (default is a space)");
3232
3233static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003234string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003235{
3236 Py_ssize_t marg, left;
3237 Py_ssize_t width;
3238 char fillchar = ' ';
3239
3240 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3241 return NULL;
3242
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003244 Py_INCREF(self);
3245 return (PyObject*) self;
3246 }
3247
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003248 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003249 left = marg / 2 + (marg & width & 1);
3250
3251 return pad(self, left, marg - left, fillchar);
3252}
3253
3254PyDoc_STRVAR(zfill__doc__,
3255"S.zfill(width) -> string\n"
3256"\n"
3257"Pad a numeric string S with zeros on the left, to fill a field\n"
3258"of the specified width. The string S is never truncated.");
3259
3260static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003261string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003262{
3263 Py_ssize_t fill;
3264 PyObject *s;
3265 char *p;
3266 Py_ssize_t width;
3267
3268 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3269 return NULL;
3270
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003271 if (PyString_GET_SIZE(self) >= width) {
3272 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003273 Py_INCREF(self);
3274 return (PyObject*) self;
3275 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003276 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003277 return PyString_FromStringAndSize(
3278 PyString_AS_STRING(self),
3279 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003280 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003281 }
3282
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003283 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003284
Christian Heimes44720832008-05-26 13:01:01 +00003285 s = pad(self, fill, 0, '0');
3286
3287 if (s == NULL)
3288 return NULL;
3289
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003290 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003291 if (p[fill] == '+' || p[fill] == '-') {
3292 /* move sign to beginning of string */
3293 p[0] = p[fill];
3294 p[fill] = '0';
3295 }
3296
3297 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003298}
3299
Christian Heimes44720832008-05-26 13:01:01 +00003300PyDoc_STRVAR(isspace__doc__,
3301"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003302\n\
Christian Heimes44720832008-05-26 13:01:01 +00003303Return True if all characters in S are whitespace\n\
3304and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003305
Christian Heimes44720832008-05-26 13:01:01 +00003306static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003307string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308{
Christian Heimes44720832008-05-26 13:01:01 +00003309 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003310 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003311 register const unsigned char *e;
3312
3313 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003314 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003315 isspace(*p))
3316 return PyBool_FromLong(1);
3317
3318 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003319 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003320 return PyBool_FromLong(0);
3321
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003323 for (; p < e; p++) {
3324 if (!isspace(*p))
3325 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003326 }
Christian Heimes44720832008-05-26 13:01:01 +00003327 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003328}
3329
Christian Heimes44720832008-05-26 13:01:01 +00003330
3331PyDoc_STRVAR(isalpha__doc__,
3332"S.isalpha() -> bool\n\
3333\n\
3334Return True if all characters in S are alphabetic\n\
3335and there is at least one character in S, False otherwise.");
3336
3337static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339{
Christian Heimes44720832008-05-26 13:01:01 +00003340 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003341 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003342 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003343
Christian Heimes44720832008-05-26 13:01:01 +00003344 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003346 isalpha(*p))
3347 return PyBool_FromLong(1);
3348
3349 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003350 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003351 return PyBool_FromLong(0);
3352
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003353 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003354 for (; p < e; p++) {
3355 if (!isalpha(*p))
3356 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003357 }
Christian Heimes44720832008-05-26 13:01:01 +00003358 return PyBool_FromLong(1);
3359}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360
Christian Heimes44720832008-05-26 13:01:01 +00003361
3362PyDoc_STRVAR(isalnum__doc__,
3363"S.isalnum() -> bool\n\
3364\n\
3365Return True if all characters in S are alphanumeric\n\
3366and there is at least one character in S, False otherwise.");
3367
3368static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003369string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003370{
3371 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003373 register const unsigned char *e;
3374
3375 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003376 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003377 isalnum(*p))
3378 return PyBool_FromLong(1);
3379
3380 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003381 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003382 return PyBool_FromLong(0);
3383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003385 for (; p < e; p++) {
3386 if (!isalnum(*p))
3387 return PyBool_FromLong(0);
3388 }
3389 return PyBool_FromLong(1);
3390}
3391
3392
3393PyDoc_STRVAR(isdigit__doc__,
3394"S.isdigit() -> bool\n\
3395\n\
3396Return True if all characters in S are digits\n\
3397and there is at least one character in S, False otherwise.");
3398
3399static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003400string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003401{
3402 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003403 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003404 register const unsigned char *e;
3405
3406 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003407 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003408 isdigit(*p))
3409 return PyBool_FromLong(1);
3410
3411 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003412 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003413 return PyBool_FromLong(0);
3414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003415 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003416 for (; p < e; p++) {
3417 if (!isdigit(*p))
3418 return PyBool_FromLong(0);
3419 }
3420 return PyBool_FromLong(1);
3421}
3422
3423
3424PyDoc_STRVAR(islower__doc__,
3425"S.islower() -> bool\n\
3426\n\
3427Return True if all cased characters in S are lowercase and there is\n\
3428at least one cased character in S, False otherwise.");
3429
3430static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003431string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003432{
3433 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003434 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003435 register const unsigned char *e;
3436 int cased;
3437
3438 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003439 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003440 return PyBool_FromLong(islower(*p) != 0);
3441
3442 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003443 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003444 return PyBool_FromLong(0);
3445
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003446 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003447 cased = 0;
3448 for (; p < e; p++) {
3449 if (isupper(*p))
3450 return PyBool_FromLong(0);
3451 else if (!cased && islower(*p))
3452 cased = 1;
3453 }
3454 return PyBool_FromLong(cased);
3455}
3456
3457
3458PyDoc_STRVAR(isupper__doc__,
3459"S.isupper() -> bool\n\
3460\n\
3461Return True if all cased characters in S are uppercase and there is\n\
3462at least one cased character in S, False otherwise.");
3463
3464static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003465string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003466{
3467 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003468 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003469 register const unsigned char *e;
3470 int cased;
3471
3472 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003473 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003474 return PyBool_FromLong(isupper(*p) != 0);
3475
3476 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003477 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003478 return PyBool_FromLong(0);
3479
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003481 cased = 0;
3482 for (; p < e; p++) {
3483 if (islower(*p))
3484 return PyBool_FromLong(0);
3485 else if (!cased && isupper(*p))
3486 cased = 1;
3487 }
3488 return PyBool_FromLong(cased);
3489}
3490
3491
3492PyDoc_STRVAR(istitle__doc__,
3493"S.istitle() -> bool\n\
3494\n\
3495Return True if S is a titlecased string and there is at least one\n\
3496character in S, i.e. uppercase characters may only follow uncased\n\
3497characters and lowercase characters only cased ones. Return False\n\
3498otherwise.");
3499
3500static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003501string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003502{
3503 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003504 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003505 register const unsigned char *e;
3506 int cased, previous_is_cased;
3507
3508 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003509 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003510 return PyBool_FromLong(isupper(*p) != 0);
3511
3512 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003513 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003514 return PyBool_FromLong(0);
3515
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003516 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003517 cased = 0;
3518 previous_is_cased = 0;
3519 for (; p < e; p++) {
3520 register const unsigned char ch = *p;
3521
3522 if (isupper(ch)) {
3523 if (previous_is_cased)
3524 return PyBool_FromLong(0);
3525 previous_is_cased = 1;
3526 cased = 1;
3527 }
3528 else if (islower(ch)) {
3529 if (!previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3533 }
3534 else
3535 previous_is_cased = 0;
3536 }
3537 return PyBool_FromLong(cased);
3538}
3539
3540
3541PyDoc_STRVAR(splitlines__doc__,
3542"S.splitlines([keepends]) -> list of strings\n\
3543\n\
3544Return a list of the lines in S, breaking at line boundaries.\n\
3545Line breaks are not included in the resulting list unless keepends\n\
3546is given and true.");
3547
3548static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003550{
Christian Heimes44720832008-05-26 13:01:01 +00003551 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003552
3553 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3554 return NULL;
3555
Antoine Pitrou64672132010-01-13 07:55:48 +00003556 return stringlib_splitlines(
3557 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558 keepends
3559 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003560}
3561
Robert Schuppenies51df0642008-06-01 16:16:17 +00003562PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003563"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003564
3565static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003566string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567{
3568 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00003569 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003570 return PyInt_FromSsize_t(res);
3571}
3572
Christian Heimes1a6387e2008-03-26 12:49:49 +00003573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003574string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003575{
Christian Heimes44720832008-05-26 13:01:01 +00003576 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003577}
3578
Christian Heimes1a6387e2008-03-26 12:49:49 +00003579
Christian Heimes44720832008-05-26 13:01:01 +00003580#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003581
Christian Heimes44720832008-05-26 13:01:01 +00003582PyDoc_STRVAR(format__doc__,
3583"S.format(*args, **kwargs) -> unicode\n\
3584\n\
3585");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003586
Eric Smithdc13b792008-05-30 18:10:04 +00003587static PyObject *
3588string__format__(PyObject* self, PyObject* args)
3589{
3590 PyObject *format_spec;
3591 PyObject *result = NULL;
3592 PyObject *tmp = NULL;
3593
3594 /* If 2.x, convert format_spec to the same type as value */
3595 /* This is to allow things like u''.format('') */
3596 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3597 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003598 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003599 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3600 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3601 goto done;
3602 }
3603 tmp = PyObject_Str(format_spec);
3604 if (tmp == NULL)
3605 goto done;
3606 format_spec = tmp;
3607
3608 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003609 PyString_AS_STRING(format_spec),
3610 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003611done:
3612 Py_XDECREF(tmp);
3613 return result;
3614}
3615
Christian Heimes44720832008-05-26 13:01:01 +00003616PyDoc_STRVAR(p_format__doc__,
3617"S.__format__(format_spec) -> unicode\n\
3618\n\
3619");
3620
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003621
Christian Heimes1a6387e2008-03-26 12:49:49 +00003622static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003623string_methods[] = {
3624 /* Counterparts of the obsolete stropmodule functions; except
3625 string.maketrans(). */
3626 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3627 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3628 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3629 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3630 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3631 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3632 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3633 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3634 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3635 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3636 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3637 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3638 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3639 capitalize__doc__},
3640 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3641 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3642 endswith__doc__},
3643 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3644 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3645 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3646 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3647 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3648 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3649 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3650 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3651 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3652 rpartition__doc__},
3653 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3654 startswith__doc__},
3655 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3656 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3657 swapcase__doc__},
3658 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3659 translate__doc__},
3660 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3661 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3662 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3663 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3664 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3665 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3666 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3667 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3668 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00003669 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3670 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00003671 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3672 expandtabs__doc__},
3673 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3674 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00003675 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3676 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00003677 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3678 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003679};
3680
3681static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003682str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003683
Christian Heimes44720832008-05-26 13:01:01 +00003684static PyObject *
3685string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3686{
3687 PyObject *x = NULL;
3688 static char *kwlist[] = {"object", 0};
3689
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003690 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00003691 return str_subtype_new(type, args, kwds);
3692 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3693 return NULL;
3694 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003695 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00003696 return PyObject_Str(x);
3697}
3698
3699static PyObject *
3700str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3701{
3702 PyObject *tmp, *pnew;
3703 Py_ssize_t n;
3704
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003705 assert(PyType_IsSubtype(type, &PyString_Type));
3706 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00003707 if (tmp == NULL)
3708 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003709 assert(PyString_CheckExact(tmp));
3710 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00003711 pnew = type->tp_alloc(type, n);
3712 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003713 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3714 ((PyStringObject *)pnew)->ob_shash =
3715 ((PyStringObject *)tmp)->ob_shash;
3716 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00003717 }
3718 Py_DECREF(tmp);
3719 return pnew;
3720}
3721
3722static PyObject *
3723basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3724{
3725 PyErr_SetString(PyExc_TypeError,
3726 "The basestring type cannot be instantiated");
3727 return NULL;
3728}
3729
3730static PyObject *
3731string_mod(PyObject *v, PyObject *w)
3732{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003733 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003734 Py_INCREF(Py_NotImplemented);
3735 return Py_NotImplemented;
3736 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003737 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003738}
3739
3740PyDoc_STRVAR(basestring_doc,
3741"Type basestring cannot be instantiated; it is the base for str and unicode.");
3742
3743static PyNumberMethods string_as_number = {
3744 0, /*nb_add*/
3745 0, /*nb_subtract*/
3746 0, /*nb_multiply*/
3747 0, /*nb_divide*/
3748 string_mod, /*nb_remainder*/
3749};
3750
3751
3752PyTypeObject PyBaseString_Type = {
3753 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3754 "basestring",
3755 0,
3756 0,
3757 0, /* tp_dealloc */
3758 0, /* tp_print */
3759 0, /* tp_getattr */
3760 0, /* tp_setattr */
3761 0, /* tp_compare */
3762 0, /* tp_repr */
3763 0, /* tp_as_number */
3764 0, /* tp_as_sequence */
3765 0, /* tp_as_mapping */
3766 0, /* tp_hash */
3767 0, /* tp_call */
3768 0, /* tp_str */
3769 0, /* tp_getattro */
3770 0, /* tp_setattro */
3771 0, /* tp_as_buffer */
3772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3773 basestring_doc, /* tp_doc */
3774 0, /* tp_traverse */
3775 0, /* tp_clear */
3776 0, /* tp_richcompare */
3777 0, /* tp_weaklistoffset */
3778 0, /* tp_iter */
3779 0, /* tp_iternext */
3780 0, /* tp_methods */
3781 0, /* tp_members */
3782 0, /* tp_getset */
3783 &PyBaseObject_Type, /* tp_base */
3784 0, /* tp_dict */
3785 0, /* tp_descr_get */
3786 0, /* tp_descr_set */
3787 0, /* tp_dictoffset */
3788 0, /* tp_init */
3789 0, /* tp_alloc */
3790 basestring_new, /* tp_new */
3791 0, /* tp_free */
3792};
3793
3794PyDoc_STRVAR(string_doc,
3795"str(object) -> string\n\
3796\n\
3797Return a nice string representation of the object.\n\
3798If the argument is a string, the return value is the same object.");
3799
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003800PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00003801 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3802 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00003803 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00003804 sizeof(char),
3805 string_dealloc, /* tp_dealloc */
3806 (printfunc)string_print, /* tp_print */
3807 0, /* tp_getattr */
3808 0, /* tp_setattr */
3809 0, /* tp_compare */
3810 string_repr, /* tp_repr */
3811 &string_as_number, /* tp_as_number */
3812 &string_as_sequence, /* tp_as_sequence */
3813 &string_as_mapping, /* tp_as_mapping */
3814 (hashfunc)string_hash, /* tp_hash */
3815 0, /* tp_call */
3816 string_str, /* tp_str */
3817 PyObject_GenericGetAttr, /* tp_getattro */
3818 0, /* tp_setattro */
3819 &string_as_buffer, /* tp_as_buffer */
3820 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3821 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3822 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3823 string_doc, /* tp_doc */
3824 0, /* tp_traverse */
3825 0, /* tp_clear */
3826 (richcmpfunc)string_richcompare, /* tp_richcompare */
3827 0, /* tp_weaklistoffset */
3828 0, /* tp_iter */
3829 0, /* tp_iternext */
3830 string_methods, /* tp_methods */
3831 0, /* tp_members */
3832 0, /* tp_getset */
3833 &PyBaseString_Type, /* tp_base */
3834 0, /* tp_dict */
3835 0, /* tp_descr_get */
3836 0, /* tp_descr_set */
3837 0, /* tp_dictoffset */
3838 0, /* tp_init */
3839 0, /* tp_alloc */
3840 string_new, /* tp_new */
3841 PyObject_Del, /* tp_free */
3842};
3843
3844void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003845PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003846{
3847 register PyObject *v;
3848 if (*pv == NULL)
3849 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003850 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00003851 Py_DECREF(*pv);
3852 *pv = NULL;
3853 return;
3854 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00003856 Py_DECREF(*pv);
3857 *pv = v;
3858}
3859
3860void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003861PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003862{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003863 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00003864 Py_XDECREF(w);
3865}
3866
3867
3868/* The following function breaks the notion that strings are immutable:
3869 it changes the size of a string. We get away with this only if there
3870 is only one module referencing the object. You can also think of it
3871 as creating a new string object and destroying the old one, only
3872 more efficiently. In any case, don't use this if the string may
3873 already be known to some other part of the code...
3874 Note that if there's not enough memory to resize the string, the original
3875 string object at *pv is deallocated, *pv is set to NULL, an "out of
3876 memory" exception is set, and -1 is returned. Else (on success) 0 is
3877 returned, and the value in *pv may or may not be the same as on input.
3878 As always, an extra byte is allocated for a trailing \0 byte (newsize
3879 does *not* include that), and a trailing \0 byte is stored.
3880*/
3881
3882int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003883_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003884{
3885 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003886 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00003887 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003888 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3889 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003890 *pv = 0;
3891 Py_DECREF(v);
3892 PyErr_BadInternalCall();
3893 return -1;
3894 }
3895 /* XXX UNREF/NEWREF interface should be more symmetrical */
3896 _Py_DEC_REFTOTAL;
3897 _Py_ForgetReference(v);
3898 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00003899 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00003900 if (*pv == NULL) {
3901 PyObject_Del(v);
3902 PyErr_NoMemory();
3903 return -1;
3904 }
3905 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003906 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00003907 Py_SIZE(sv) = newsize;
3908 sv->ob_sval[newsize] = '\0';
3909 sv->ob_shash = -1; /* invalidate cached hash value */
3910 return 0;
3911}
3912
3913/* Helpers for formatstring */
3914
3915Py_LOCAL_INLINE(PyObject *)
3916getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3917{
3918 Py_ssize_t argidx = *p_argidx;
3919 if (argidx < arglen) {
3920 (*p_argidx)++;
3921 if (arglen < 0)
3922 return args;
3923 else
3924 return PyTuple_GetItem(args, argidx);
3925 }
3926 PyErr_SetString(PyExc_TypeError,
3927 "not enough arguments for format string");
3928 return NULL;
3929}
3930
3931/* Format codes
3932 * F_LJUST '-'
3933 * F_SIGN '+'
3934 * F_BLANK ' '
3935 * F_ALT '#'
3936 * F_ZERO '0'
3937 */
3938#define F_LJUST (1<<0)
3939#define F_SIGN (1<<1)
3940#define F_BLANK (1<<2)
3941#define F_ALT (1<<3)
3942#define F_ZERO (1<<4)
3943
Mark Dickinson18cfada2009-11-23 18:46:41 +00003944/* Returns a new reference to a PyString object, or NULL on failure. */
3945
3946static PyObject *
3947formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003948{
Mark Dickinson18cfada2009-11-23 18:46:41 +00003949 char *p;
3950 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003951 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003952
Christian Heimes44720832008-05-26 13:01:01 +00003953 x = PyFloat_AsDouble(v);
3954 if (x == -1.0 && PyErr_Occurred()) {
3955 PyErr_Format(PyExc_TypeError, "float argument required, "
3956 "not %.200s", Py_TYPE(v)->tp_name);
Mark Dickinson18cfada2009-11-23 18:46:41 +00003957 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003958 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003959
Christian Heimes44720832008-05-26 13:01:01 +00003960 if (prec < 0)
3961 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003962
Mark Dickinson18cfada2009-11-23 18:46:41 +00003963 p = PyOS_double_to_string(x, type, prec,
3964 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003965
Mark Dickinson18cfada2009-11-23 18:46:41 +00003966 if (p == NULL)
3967 return NULL;
3968 result = PyString_FromStringAndSize(p, strlen(p));
3969 PyMem_Free(p);
3970 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003971}
3972
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003973/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003974 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3975 * Python's regular ints.
3976 * Return value: a new PyString*, or NULL if error.
3977 * . *pbuf is set to point into it,
3978 * *plen set to the # of chars following that.
3979 * Caller must decref it when done using pbuf.
3980 * The string starting at *pbuf is of the form
3981 * "-"? ("0x" | "0X")? digit+
3982 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3983 * set in flags. The case of hex digits will be correct,
3984 * There will be at least prec digits, zero-filled on the left if
3985 * necessary to get that many.
3986 * val object to be converted
3987 * flags bitmask of format flags; only F_ALT is looked at
3988 * prec minimum number of digits; 0-fill on left if needed
3989 * type a character in [duoxX]; u acts the same as d
3990 *
3991 * CAUTION: o, x and X conversions on regular ints can never
3992 * produce a '-' sign, but can for Python's unbounded ints.
3993 */
3994PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003995_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00003996 char **pbuf, int *plen)
3997{
3998 PyObject *result = NULL;
3999 char *buf;
4000 Py_ssize_t i;
4001 int sign; /* 1 if '-', else 0 */
4002 int len; /* number of characters */
4003 Py_ssize_t llen;
4004 int numdigits; /* len == numnondigits + numdigits */
4005 int numnondigits = 0;
4006
4007 switch (type) {
4008 case 'd':
4009 case 'u':
4010 result = Py_TYPE(val)->tp_str(val);
4011 break;
4012 case 'o':
4013 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4014 break;
4015 case 'x':
4016 case 'X':
4017 numnondigits = 2;
4018 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4019 break;
4020 default:
4021 assert(!"'type' not in [duoxX]");
4022 }
4023 if (!result)
4024 return NULL;
4025
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004026 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004027 if (!buf) {
4028 Py_DECREF(result);
4029 return NULL;
4030 }
4031
4032 /* To modify the string in-place, there can only be one reference. */
4033 if (Py_REFCNT(result) != 1) {
4034 PyErr_BadInternalCall();
4035 return NULL;
4036 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004037 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004038 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004039 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004040 return NULL;
4041 }
4042 len = (int)llen;
4043 if (buf[len-1] == 'L') {
4044 --len;
4045 buf[len] = '\0';
4046 }
4047 sign = buf[0] == '-';
4048 numnondigits += sign;
4049 numdigits = len - numnondigits;
4050 assert(numdigits > 0);
4051
4052 /* Get rid of base marker unless F_ALT */
4053 if ((flags & F_ALT) == 0) {
4054 /* Need to skip 0x, 0X or 0. */
4055 int skipped = 0;
4056 switch (type) {
4057 case 'o':
4058 assert(buf[sign] == '0');
4059 /* If 0 is only digit, leave it alone. */
4060 if (numdigits > 1) {
4061 skipped = 1;
4062 --numdigits;
4063 }
4064 break;
4065 case 'x':
4066 case 'X':
4067 assert(buf[sign] == '0');
4068 assert(buf[sign + 1] == 'x');
4069 skipped = 2;
4070 numnondigits -= 2;
4071 break;
4072 }
4073 if (skipped) {
4074 buf += skipped;
4075 len -= skipped;
4076 if (sign)
4077 buf[0] = '-';
4078 }
4079 assert(len == numnondigits + numdigits);
4080 assert(numdigits > 0);
4081 }
4082
4083 /* Fill with leading zeroes to meet minimum width. */
4084 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004085 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004086 numnondigits + prec);
4087 char *b1;
4088 if (!r1) {
4089 Py_DECREF(result);
4090 return NULL;
4091 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004092 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004093 for (i = 0; i < numnondigits; ++i)
4094 *b1++ = *buf++;
4095 for (i = 0; i < prec - numdigits; i++)
4096 *b1++ = '0';
4097 for (i = 0; i < numdigits; i++)
4098 *b1++ = *buf++;
4099 *b1 = '\0';
4100 Py_DECREF(result);
4101 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004102 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004103 len = numnondigits + prec;
4104 }
4105
4106 /* Fix up case for hex conversions. */
4107 if (type == 'X') {
4108 /* Need to convert all lower case letters to upper case.
4109 and need to convert 0x to 0X (and -0x to -0X). */
4110 for (i = 0; i < len; i++)
4111 if (buf[i] >= 'a' && buf[i] <= 'x')
4112 buf[i] -= 'a'-'A';
4113 }
4114 *pbuf = buf;
4115 *plen = len;
4116 return result;
4117}
4118
4119Py_LOCAL_INLINE(int)
4120formatint(char *buf, size_t buflen, int flags,
4121 int prec, int type, PyObject *v)
4122{
4123 /* fmt = '%#.' + `prec` + 'l' + `type`
4124 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125 + 1 + 1 = 24 */
4126 char fmt[64]; /* plenty big enough! */
4127 char *sign;
4128 long x;
4129
4130 x = PyInt_AsLong(v);
4131 if (x == -1 && PyErr_Occurred()) {
4132 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4133 Py_TYPE(v)->tp_name);
4134 return -1;
4135 }
4136 if (x < 0 && type == 'u') {
4137 type = 'd';
4138 }
4139 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4140 sign = "-";
4141 else
4142 sign = "";
4143 if (prec < 0)
4144 prec = 1;
4145
4146 if ((flags & F_ALT) &&
4147 (type == 'x' || type == 'X')) {
4148 /* When converting under %#x or %#X, there are a number
4149 * of issues that cause pain:
4150 * - when 0 is being converted, the C standard leaves off
4151 * the '0x' or '0X', which is inconsistent with other
4152 * %#x/%#X conversions and inconsistent with Python's
4153 * hex() function
4154 * - there are platforms that violate the standard and
4155 * convert 0 with the '0x' or '0X'
4156 * (Metrowerks, Compaq Tru64)
4157 * - there are platforms that give '0x' when converting
4158 * under %#X, but convert 0 in accordance with the
4159 * standard (OS/2 EMX)
4160 *
4161 * We can achieve the desired consistency by inserting our
4162 * own '0x' or '0X' prefix, and substituting %x/%X in place
4163 * of %#x/%#X.
4164 *
4165 * Note that this is the same approach as used in
4166 * formatint() in unicodeobject.c
4167 */
4168 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4169 sign, type, prec, type);
4170 }
4171 else {
4172 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4173 sign, (flags&F_ALT) ? "#" : "",
4174 prec, type);
4175 }
4176
4177 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179 */
4180 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4181 PyErr_SetString(PyExc_OverflowError,
4182 "formatted integer is too long (precision too large?)");
4183 return -1;
4184 }
4185 if (sign[0])
4186 PyOS_snprintf(buf, buflen, fmt, -x);
4187 else
4188 PyOS_snprintf(buf, buflen, fmt, x);
4189 return (int)strlen(buf);
4190}
4191
4192Py_LOCAL_INLINE(int)
4193formatchar(char *buf, size_t buflen, PyObject *v)
4194{
4195 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004196 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004197 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4198 return -1;
4199 }
4200 else {
4201 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4202 return -1;
4203 }
4204 buf[1] = '\0';
4205 return 1;
4206}
4207
4208/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209
Mark Dickinson18cfada2009-11-23 18:46:41 +00004210 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004211 chars are formatted. XXX This is a magic number. Each formatting
4212 routine does bounds checking to ensure no overflow, but a better
4213 solution may be to malloc a buffer of appropriate size for each
4214 format. For now, the current solution is sufficient.
4215*/
4216#define FORMATBUFLEN (size_t)120
4217
4218PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004219PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004220{
4221 char *fmt, *res;
4222 Py_ssize_t arglen, argidx;
4223 Py_ssize_t reslen, rescnt, fmtcnt;
4224 int args_owned = 0;
4225 PyObject *result, *orig_args;
4226#ifdef Py_USING_UNICODE
4227 PyObject *v, *w;
4228#endif
4229 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004230 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004231 PyErr_BadInternalCall();
4232 return NULL;
4233 }
4234 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004235 fmt = PyString_AS_STRING(format);
4236 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004237 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004238 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004239 if (result == NULL)
4240 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004241 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004242 if (PyTuple_Check(args)) {
4243 arglen = PyTuple_GET_SIZE(args);
4244 argidx = 0;
4245 }
4246 else {
4247 arglen = -1;
4248 argidx = -2;
4249 }
4250 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4251 !PyObject_TypeCheck(args, &PyBaseString_Type))
4252 dict = args;
4253 while (--fmtcnt >= 0) {
4254 if (*fmt != '%') {
4255 if (--rescnt < 0) {
4256 rescnt = fmtcnt + 100;
4257 reslen += rescnt;
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +00004258 if (_PyString_Resize(&result, reslen))
Christian Heimes44720832008-05-26 13:01:01 +00004259 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004260 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004261 + reslen - rescnt;
4262 --rescnt;
4263 }
4264 *res++ = *fmt++;
4265 }
4266 else {
4267 /* Got a format specifier */
4268 int flags = 0;
4269 Py_ssize_t width = -1;
4270 int prec = -1;
4271 int c = '\0';
4272 int fill;
4273 int isnumok;
4274 PyObject *v = NULL;
4275 PyObject *temp = NULL;
4276 char *pbuf;
4277 int sign;
4278 Py_ssize_t len;
4279 char formatbuf[FORMATBUFLEN];
Mark Dickinson18cfada2009-11-23 18:46:41 +00004280 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004281#ifdef Py_USING_UNICODE
4282 char *fmt_start = fmt;
4283 Py_ssize_t argidx_start = argidx;
4284#endif
4285
4286 fmt++;
4287 if (*fmt == '(') {
4288 char *keystart;
4289 Py_ssize_t keylen;
4290 PyObject *key;
4291 int pcount = 1;
4292
4293 if (dict == NULL) {
4294 PyErr_SetString(PyExc_TypeError,
4295 "format requires a mapping");
4296 goto error;
4297 }
4298 ++fmt;
4299 --fmtcnt;
4300 keystart = fmt;
4301 /* Skip over balanced parentheses */
4302 while (pcount > 0 && --fmtcnt >= 0) {
4303 if (*fmt == ')')
4304 --pcount;
4305 else if (*fmt == '(')
4306 ++pcount;
4307 fmt++;
4308 }
4309 keylen = fmt - keystart - 1;
4310 if (fmtcnt < 0 || pcount > 0) {
4311 PyErr_SetString(PyExc_ValueError,
4312 "incomplete format key");
4313 goto error;
4314 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004315 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004316 keylen);
4317 if (key == NULL)
4318 goto error;
4319 if (args_owned) {
4320 Py_DECREF(args);
4321 args_owned = 0;
4322 }
4323 args = PyObject_GetItem(dict, key);
4324 Py_DECREF(key);
4325 if (args == NULL) {
4326 goto error;
4327 }
4328 args_owned = 1;
4329 arglen = -1;
4330 argidx = -2;
4331 }
4332 while (--fmtcnt >= 0) {
4333 switch (c = *fmt++) {
4334 case '-': flags |= F_LJUST; continue;
4335 case '+': flags |= F_SIGN; continue;
4336 case ' ': flags |= F_BLANK; continue;
4337 case '#': flags |= F_ALT; continue;
4338 case '0': flags |= F_ZERO; continue;
4339 }
4340 break;
4341 }
4342 if (c == '*') {
4343 v = getnextarg(args, arglen, &argidx);
4344 if (v == NULL)
4345 goto error;
4346 if (!PyInt_Check(v)) {
4347 PyErr_SetString(PyExc_TypeError,
4348 "* wants int");
4349 goto error;
4350 }
4351 width = PyInt_AsLong(v);
4352 if (width < 0) {
4353 flags |= F_LJUST;
4354 width = -width;
4355 }
4356 if (--fmtcnt >= 0)
4357 c = *fmt++;
4358 }
4359 else if (c >= 0 && isdigit(c)) {
4360 width = c - '0';
4361 while (--fmtcnt >= 0) {
4362 c = Py_CHARMASK(*fmt++);
4363 if (!isdigit(c))
4364 break;
4365 if ((width*10) / 10 != width) {
4366 PyErr_SetString(
4367 PyExc_ValueError,
4368 "width too big");
4369 goto error;
4370 }
4371 width = width*10 + (c - '0');
4372 }
4373 }
4374 if (c == '.') {
4375 prec = 0;
4376 if (--fmtcnt >= 0)
4377 c = *fmt++;
4378 if (c == '*') {
4379 v = getnextarg(args, arglen, &argidx);
4380 if (v == NULL)
4381 goto error;
4382 if (!PyInt_Check(v)) {
4383 PyErr_SetString(
4384 PyExc_TypeError,
4385 "* wants int");
4386 goto error;
4387 }
4388 prec = PyInt_AsLong(v);
4389 if (prec < 0)
4390 prec = 0;
4391 if (--fmtcnt >= 0)
4392 c = *fmt++;
4393 }
4394 else if (c >= 0 && isdigit(c)) {
4395 prec = c - '0';
4396 while (--fmtcnt >= 0) {
4397 c = Py_CHARMASK(*fmt++);
4398 if (!isdigit(c))
4399 break;
4400 if ((prec*10) / 10 != prec) {
4401 PyErr_SetString(
4402 PyExc_ValueError,
4403 "prec too big");
4404 goto error;
4405 }
4406 prec = prec*10 + (c - '0');
4407 }
4408 }
4409 } /* prec */
4410 if (fmtcnt >= 0) {
4411 if (c == 'h' || c == 'l' || c == 'L') {
4412 if (--fmtcnt >= 0)
4413 c = *fmt++;
4414 }
4415 }
4416 if (fmtcnt < 0) {
4417 PyErr_SetString(PyExc_ValueError,
4418 "incomplete format");
4419 goto error;
4420 }
4421 if (c != '%') {
4422 v = getnextarg(args, arglen, &argidx);
4423 if (v == NULL)
4424 goto error;
4425 }
4426 sign = 0;
4427 fill = ' ';
4428 switch (c) {
4429 case '%':
4430 pbuf = "%";
4431 len = 1;
4432 break;
4433 case 's':
4434#ifdef Py_USING_UNICODE
4435 if (PyUnicode_Check(v)) {
4436 fmt = fmt_start;
4437 argidx = argidx_start;
4438 goto unicode;
4439 }
4440#endif
4441 temp = _PyObject_Str(v);
4442#ifdef Py_USING_UNICODE
4443 if (temp != NULL && PyUnicode_Check(temp)) {
4444 Py_DECREF(temp);
4445 fmt = fmt_start;
4446 argidx = argidx_start;
4447 goto unicode;
4448 }
4449#endif
4450 /* Fall through */
4451 case 'r':
4452 if (c == 'r')
4453 temp = PyObject_Repr(v);
4454 if (temp == NULL)
4455 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004456 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004457 PyErr_SetString(PyExc_TypeError,
4458 "%s argument has non-string str()");
4459 Py_DECREF(temp);
4460 goto error;
4461 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004462 pbuf = PyString_AS_STRING(temp);
4463 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004464 if (prec >= 0 && len > prec)
4465 len = prec;
4466 break;
4467 case 'i':
4468 case 'd':
4469 case 'u':
4470 case 'o':
4471 case 'x':
4472 case 'X':
4473 if (c == 'i')
4474 c = 'd';
4475 isnumok = 0;
4476 if (PyNumber_Check(v)) {
4477 PyObject *iobj=NULL;
4478
4479 if (PyInt_Check(v) || (PyLong_Check(v))) {
4480 iobj = v;
4481 Py_INCREF(iobj);
4482 }
4483 else {
4484 iobj = PyNumber_Int(v);
4485 if (iobj==NULL) iobj = PyNumber_Long(v);
4486 }
4487 if (iobj!=NULL) {
4488 if (PyInt_Check(iobj)) {
4489 isnumok = 1;
4490 pbuf = formatbuf;
4491 len = formatint(pbuf,
4492 sizeof(formatbuf),
4493 flags, prec, c, iobj);
4494 Py_DECREF(iobj);
4495 if (len < 0)
4496 goto error;
4497 sign = 1;
4498 }
4499 else if (PyLong_Check(iobj)) {
4500 int ilen;
4501
4502 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004503 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004504 prec, c, &pbuf, &ilen);
4505 Py_DECREF(iobj);
4506 len = ilen;
4507 if (!temp)
4508 goto error;
4509 sign = 1;
4510 }
4511 else {
4512 Py_DECREF(iobj);
4513 }
4514 }
4515 }
4516 if (!isnumok) {
4517 PyErr_Format(PyExc_TypeError,
4518 "%%%c format: a number is required, "
4519 "not %.200s", c, Py_TYPE(v)->tp_name);
4520 goto error;
4521 }
4522 if (flags & F_ZERO)
4523 fill = '0';
4524 break;
4525 case 'e':
4526 case 'E':
4527 case 'f':
4528 case 'F':
4529 case 'g':
4530 case 'G':
Mark Dickinson18cfada2009-11-23 18:46:41 +00004531 temp = formatfloat(v, flags, prec, c);
4532 if (temp == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00004533 goto error;
Mark Dickinson18cfada2009-11-23 18:46:41 +00004534 pbuf = PyString_AS_STRING(temp);
4535 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004536 sign = 1;
4537 if (flags & F_ZERO)
4538 fill = '0';
4539 break;
4540 case 'c':
4541#ifdef Py_USING_UNICODE
4542 if (PyUnicode_Check(v)) {
4543 fmt = fmt_start;
4544 argidx = argidx_start;
4545 goto unicode;
4546 }
4547#endif
4548 pbuf = formatbuf;
4549 len = formatchar(pbuf, sizeof(formatbuf), v);
4550 if (len < 0)
4551 goto error;
4552 break;
4553 default:
4554 PyErr_Format(PyExc_ValueError,
4555 "unsupported format character '%c' (0x%x) "
4556 "at index %zd",
4557 c, c,
4558 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004559 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004560 goto error;
4561 }
4562 if (sign) {
4563 if (*pbuf == '-' || *pbuf == '+') {
4564 sign = *pbuf++;
4565 len--;
4566 }
4567 else if (flags & F_SIGN)
4568 sign = '+';
4569 else if (flags & F_BLANK)
4570 sign = ' ';
4571 else
4572 sign = 0;
4573 }
4574 if (width < len)
4575 width = len;
4576 if (rescnt - (sign != 0) < width) {
4577 reslen -= rescnt;
4578 rescnt = width + fmtcnt + 100;
4579 reslen += rescnt;
4580 if (reslen < 0) {
4581 Py_DECREF(result);
4582 Py_XDECREF(temp);
4583 return PyErr_NoMemory();
4584 }
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +00004585 if (_PyString_Resize(&result, reslen)) {
Christian Heimes44720832008-05-26 13:01:01 +00004586 Py_XDECREF(temp);
4587 return NULL;
4588 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004589 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004590 + reslen - rescnt;
4591 }
4592 if (sign) {
4593 if (fill != ' ')
4594 *res++ = sign;
4595 rescnt--;
4596 if (width > len)
4597 width--;
4598 }
4599 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4600 assert(pbuf[0] == '0');
4601 assert(pbuf[1] == c);
4602 if (fill != ' ') {
4603 *res++ = *pbuf++;
4604 *res++ = *pbuf++;
4605 }
4606 rescnt -= 2;
4607 width -= 2;
4608 if (width < 0)
4609 width = 0;
4610 len -= 2;
4611 }
4612 if (width > len && !(flags & F_LJUST)) {
4613 do {
4614 --rescnt;
4615 *res++ = fill;
4616 } while (--width > len);
4617 }
4618 if (fill == ' ') {
4619 if (sign)
4620 *res++ = sign;
4621 if ((flags & F_ALT) &&
4622 (c == 'x' || c == 'X')) {
4623 assert(pbuf[0] == '0');
4624 assert(pbuf[1] == c);
4625 *res++ = *pbuf++;
4626 *res++ = *pbuf++;
4627 }
4628 }
4629 Py_MEMCPY(res, pbuf, len);
4630 res += len;
4631 rescnt -= len;
4632 while (--width >= len) {
4633 --rescnt;
4634 *res++ = ' ';
4635 }
4636 if (dict && (argidx < arglen) && c != '%') {
4637 PyErr_SetString(PyExc_TypeError,
4638 "not all arguments converted during string formatting");
4639 Py_XDECREF(temp);
4640 goto error;
4641 }
4642 Py_XDECREF(temp);
4643 } /* '%' */
4644 } /* until end */
4645 if (argidx < arglen && !dict) {
4646 PyErr_SetString(PyExc_TypeError,
4647 "not all arguments converted during string formatting");
4648 goto error;
4649 }
4650 if (args_owned) {
4651 Py_DECREF(args);
4652 }
Benjamin Peterson6caf7ff2010-04-02 23:59:41 +00004653 if (_PyString_Resize(&result, reslen - rescnt))
4654 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004655 return result;
4656
4657#ifdef Py_USING_UNICODE
4658 unicode:
4659 if (args_owned) {
4660 Py_DECREF(args);
4661 args_owned = 0;
4662 }
4663 /* Fiddle args right (remove the first argidx arguments) */
4664 if (PyTuple_Check(orig_args) && argidx > 0) {
4665 PyObject *v;
4666 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4667 v = PyTuple_New(n);
4668 if (v == NULL)
4669 goto error;
4670 while (--n >= 0) {
4671 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4672 Py_INCREF(w);
4673 PyTuple_SET_ITEM(v, n, w);
4674 }
4675 args = v;
4676 } else {
4677 Py_INCREF(orig_args);
4678 args = orig_args;
4679 }
4680 args_owned = 1;
4681 /* Take what we have of the result and let the Unicode formatting
4682 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004683 rescnt = res - PyString_AS_STRING(result);
4684 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00004685 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004686 fmtcnt = PyString_GET_SIZE(format) - \
4687 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00004688 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4689 if (format == NULL)
4690 goto error;
4691 v = PyUnicode_Format(format, args);
4692 Py_DECREF(format);
4693 if (v == NULL)
4694 goto error;
4695 /* Paste what we have (result) to what the Unicode formatting
4696 function returned (v) and return the result (or error) */
4697 w = PyUnicode_Concat(result, v);
4698 Py_DECREF(result);
4699 Py_DECREF(v);
4700 Py_DECREF(args);
4701 return w;
4702#endif /* Py_USING_UNICODE */
4703
4704 error:
4705 Py_DECREF(result);
4706 if (args_owned) {
4707 Py_DECREF(args);
4708 }
4709 return NULL;
4710}
4711
4712void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004713PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004714{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004715 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00004716 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004717 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004718 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00004719 /* If it's a string subclass, we don't really know what putting
4720 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004721 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00004722 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004723 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00004724 return;
4725 if (interned == NULL) {
4726 interned = PyDict_New();
4727 if (interned == NULL) {
4728 PyErr_Clear(); /* Don't leave an exception */
4729 return;
4730 }
4731 }
4732 t = PyDict_GetItem(interned, (PyObject *)s);
4733 if (t) {
4734 Py_INCREF(t);
4735 Py_DECREF(*p);
4736 *p = t;
4737 return;
4738 }
4739
4740 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4741 PyErr_Clear();
4742 return;
4743 }
4744 /* The two references in interned are not counted by refcnt.
4745 The string deallocator will take care of this */
4746 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004747 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004748}
4749
4750void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004751PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004752{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004753 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004754 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4755 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004756 Py_INCREF(*p);
4757 }
4758}
4759
4760
4761PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004762PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004763{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004764 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00004765 if (s == NULL)
4766 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004767 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00004768 return s;
4769}
4770
4771void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004772PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004773{
4774 int i;
4775 for (i = 0; i < UCHAR_MAX + 1; i++) {
4776 Py_XDECREF(characters[i]);
4777 characters[i] = NULL;
4778 }
4779 Py_XDECREF(nullstring);
4780 nullstring = NULL;
4781}
4782
4783void _Py_ReleaseInternedStrings(void)
4784{
4785 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004786 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00004787 Py_ssize_t i, n;
4788 Py_ssize_t immortal_size = 0, mortal_size = 0;
4789
4790 if (interned == NULL || !PyDict_Check(interned))
4791 return;
4792 keys = PyDict_Keys(interned);
4793 if (keys == NULL || !PyList_Check(keys)) {
4794 PyErr_Clear();
4795 return;
4796 }
4797
4798 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799 detector, interned strings are not forcibly deallocated; rather, we
4800 give them their stolen references back, and then clear and DECREF
4801 the interned dict. */
4802
4803 n = PyList_GET_SIZE(keys);
4804 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4805 n);
4806 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004807 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00004808 switch (s->ob_sstate) {
4809 case SSTATE_NOT_INTERNED:
4810 /* XXX Shouldn't happen */
4811 break;
4812 case SSTATE_INTERNED_IMMORTAL:
4813 Py_REFCNT(s) += 1;
4814 immortal_size += Py_SIZE(s);
4815 break;
4816 case SSTATE_INTERNED_MORTAL:
4817 Py_REFCNT(s) += 2;
4818 mortal_size += Py_SIZE(s);
4819 break;
4820 default:
4821 Py_FatalError("Inconsistent interned string state.");
4822 }
4823 s->ob_sstate = SSTATE_NOT_INTERNED;
4824 }
4825 fprintf(stderr, "total size of all interned strings: "
4826 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4827 "mortal/immortal\n", mortal_size, immortal_size);
4828 Py_DECREF(keys);
4829 PyDict_Clear(interned);
4830 Py_DECREF(interned);
4831 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004832}