blob: 43ef3fa0b6cee840369c73d81552495ed41dba05 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194#endif
Christian Heimes44720832008-05-26 13:01:01 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
198
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
Mark Dickinson82864d12009-11-15 16:18:58 +0000202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
206#ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
212#endif
213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000215 ++f;
Mark Dickinson82864d12009-11-15 16:18:58 +0000216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235#endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
241
Christian Heimes44720832008-05-26 13:01:01 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000273 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000274 if (!string)
275 return NULL;
276
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286#endif
Christian Heimes44720832008-05-26 13:01:01 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
Mark Dickinson82864d12009-11-15 16:18:58 +0000301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
307#ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
313#endif
Christian Heimes44720832008-05-26 13:01:01 +0000314 }
315 /* handle the size_t flag. */
Mark Dickinson82864d12009-11-15 16:18:58 +0000316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000317 size_tflag = 1;
318 ++f;
319 }
320
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332#endif
Christian Heimes44720832008-05-26 13:01:01 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348#endif
Christian Heimes44720832008-05-26 13:01:01 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
396
397 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000398 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000399 return string;
400}
401
402PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000403PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000404{
405 PyObject* ret;
406 va_list vargs;
407
408#ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs, format);
410#else
411 va_start(vargs);
412#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000413 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000414 va_end(vargs);
415 return ret;
416}
417
418
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000419PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000420 Py_ssize_t size,
421 const char *encoding,
422 const char *errors)
423{
424 PyObject *v, *str;
425
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000427 if (str == NULL)
428 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000430 Py_DECREF(str);
431 return v;
432}
433
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000434PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000435 const char *encoding,
436 const char *errors)
437{
438 PyObject *v;
439
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000440 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000441 PyErr_BadArgument();
442 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000443 }
444
Christian Heimes44720832008-05-26 13:01:01 +0000445 if (encoding == NULL) {
446#ifdef Py_USING_UNICODE
447 encoding = PyUnicode_GetDefaultEncoding();
448#else
449 PyErr_SetString(PyExc_ValueError, "no encoding specified");
450 goto onError;
451#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000452 }
Christian Heimes44720832008-05-26 13:01:01 +0000453
454 /* Decode via the codec registry */
455 v = PyCodec_Decode(str, encoding, errors);
456 if (v == NULL)
457 goto onError;
458
459 return v;
460
461 onError:
462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000463}
464
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000465PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000466 const char *encoding,
467 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000468{
Christian Heimes44720832008-05-26 13:01:01 +0000469 PyObject *v;
470
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000471 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000472 if (v == NULL)
473 goto onError;
474
475#ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v)) {
478 PyObject *temp = v;
479 v = PyUnicode_AsEncodedString(v, NULL, NULL);
480 Py_DECREF(temp);
481 if (v == NULL)
482 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000483 }
Christian Heimes44720832008-05-26 13:01:01 +0000484#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000485 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000486 PyErr_Format(PyExc_TypeError,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v)->tp_name);
489 Py_DECREF(v);
490 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000491 }
Christian Heimes44720832008-05-26 13:01:01 +0000492
493 return v;
494
495 onError:
496 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000497}
498
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000499PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000500 Py_ssize_t size,
501 const char *encoding,
502 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000503{
Christian Heimes44720832008-05-26 13:01:01 +0000504 PyObject *v, *str;
505
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000507 if (str == NULL)
508 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000510 Py_DECREF(str);
511 return v;
512}
513
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000514PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000515 const char *encoding,
516 const char *errors)
517{
518 PyObject *v;
519
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000520 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000521 PyErr_BadArgument();
522 goto onError;
523 }
524
525 if (encoding == NULL) {
526#ifdef Py_USING_UNICODE
527 encoding = PyUnicode_GetDefaultEncoding();
528#else
529 PyErr_SetString(PyExc_ValueError, "no encoding specified");
530 goto onError;
531#endif
532 }
533
534 /* Encode via the codec registry */
535 v = PyCodec_Encode(str, encoding, errors);
536 if (v == NULL)
537 goto onError;
538
539 return v;
540
541 onError:
542 return NULL;
543}
544
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000545PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000546 const char *encoding,
547 const char *errors)
548{
549 PyObject *v;
550
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000551 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000552 if (v == NULL)
553 goto onError;
554
555#ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v)) {
558 PyObject *temp = v;
559 v = PyUnicode_AsEncodedString(v, NULL, NULL);
560 Py_DECREF(temp);
561 if (v == NULL)
562 goto onError;
563 }
564#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000565 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000566 PyErr_Format(PyExc_TypeError,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v)->tp_name);
569 Py_DECREF(v);
570 goto onError;
571 }
572
573 return v;
574
575 onError:
576 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000577}
578
579static void
Christian Heimes44720832008-05-26 13:01:01 +0000580string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000581{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000582 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000583 case SSTATE_NOT_INTERNED:
584 break;
585
586 case SSTATE_INTERNED_MORTAL:
587 /* revive dead object temporarily for DelItem */
588 Py_REFCNT(op) = 3;
589 if (PyDict_DelItem(interned, op) != 0)
590 Py_FatalError(
591 "deletion of interned string failed");
592 break;
593
594 case SSTATE_INTERNED_IMMORTAL:
595 Py_FatalError("Immortal interned string died.");
596
597 default:
598 Py_FatalError("Inconsistent interned string state.");
599 }
600 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000601}
602
Christian Heimes44720832008-05-26 13:01:01 +0000603/* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
607
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000608PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000609 Py_ssize_t len,
610 const char *errors,
611 Py_ssize_t unicode,
612 const char *recode_encoding)
613{
614 int c;
615 char *p, *buf;
616 const char *end;
617 PyObject *v;
618 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000619 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000620 if (v == NULL)
621 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000622 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000623 end = s + len;
624 while (s < end) {
625 if (*s != '\\') {
626 non_esc:
627#ifdef Py_USING_UNICODE
628 if (recode_encoding && (*s & 0x80)) {
629 PyObject *u, *w;
630 char *r;
631 const char* t;
632 Py_ssize_t rn;
633 t = s;
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t < end && (*t & 0x80)) t++;
636 u = PyUnicode_DecodeUTF8(s, t - s, errors);
637 if(!u) goto failed;
638
639 /* Recode them in target encoding. */
640 w = PyUnicode_AsEncodedString(
641 u, recode_encoding, errors);
642 Py_DECREF(u);
643 if (!w) goto failed;
644
645 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000646 assert(PyString_Check(w));
647 r = PyString_AS_STRING(w);
648 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000649 Py_MEMCPY(p, r, rn);
650 p += rn;
651 Py_DECREF(w);
652 s = t;
653 } else {
654 *p++ = *s++;
655 }
656#else
657 *p++ = *s++;
658#endif
659 continue;
660 }
661 s++;
662 if (s==end) {
663 PyErr_SetString(PyExc_ValueError,
664 "Trailing \\ in string");
665 goto failed;
666 }
667 switch (*s++) {
668 /* XXX This assumes ASCII! */
669 case '\n': break;
670 case '\\': *p++ = '\\'; break;
671 case '\'': *p++ = '\''; break;
672 case '\"': *p++ = '\"'; break;
673 case 'b': *p++ = '\b'; break;
674 case 'f': *p++ = '\014'; break; /* FF */
675 case 't': *p++ = '\t'; break;
676 case 'n': *p++ = '\n'; break;
677 case 'r': *p++ = '\r'; break;
678 case 'v': *p++ = '\013'; break; /* VT */
679 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
682 c = s[-1] - '0';
683 if (s < end && '0' <= *s && *s <= '7') {
684 c = (c<<3) + *s++ - '0';
685 if (s < end && '0' <= *s && *s <= '7')
686 c = (c<<3) + *s++ - '0';
687 }
688 *p++ = c;
689 break;
690 case 'x':
691 if (s+1 < end &&
692 isxdigit(Py_CHARMASK(s[0])) &&
693 isxdigit(Py_CHARMASK(s[1])))
694 {
695 unsigned int x = 0;
696 c = Py_CHARMASK(*s);
697 s++;
698 if (isdigit(c))
699 x = c - '0';
700 else if (islower(c))
701 x = 10 + c - 'a';
702 else
703 x = 10 + c - 'A';
704 x = x << 4;
705 c = Py_CHARMASK(*s);
706 s++;
707 if (isdigit(c))
708 x += c - '0';
709 else if (islower(c))
710 x += 10 + c - 'a';
711 else
712 x += 10 + c - 'A';
713 *p++ = x;
714 break;
715 }
716 if (!errors || strcmp(errors, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError,
718 "invalid \\x escape");
719 goto failed;
720 }
721 if (strcmp(errors, "replace") == 0) {
722 *p++ = '?';
723 } else if (strcmp(errors, "ignore") == 0)
724 /* do nothing */;
725 else {
726 PyErr_Format(PyExc_ValueError,
727 "decoding error; "
728 "unknown error handling code: %.400s",
729 errors);
730 goto failed;
731 }
732#ifndef Py_USING_UNICODE
733 case 'u':
734 case 'U':
735 case 'N':
736 if (unicode) {
737 PyErr_SetString(PyExc_ValueError,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
740 goto failed;
741 }
742#endif
743 default:
744 *p++ = '\\';
745 s--;
746 goto non_esc; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
748 }
749 }
750 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000752 return v;
753 failed:
754 Py_DECREF(v);
755 return NULL;
756}
757
758/* -------------------------------------------------------------------- */
759/* object api */
760
Christian Heimes1a6387e2008-03-26 12:49:49 +0000761static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000762string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000763{
Christian Heimes44720832008-05-26 13:01:01 +0000764 char *s;
765 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000766 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000767 return -1;
768 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000769}
770
Christian Heimes44720832008-05-26 13:01:01 +0000771static /*const*/ char *
772string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000773{
Christian Heimes44720832008-05-26 13:01:01 +0000774 char *s;
775 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000776 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000777 return NULL;
778 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779}
780
781Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000782PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000784 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000785 return string_getsize(op);
786 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787}
788
Christian Heimes44720832008-05-26 13:01:01 +0000789/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000790PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000792 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000793 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000794 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795}
796
797int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000799 register char **s,
800 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801{
Christian Heimes44720832008-05-26 13:01:01 +0000802 if (s == NULL) {
803 PyErr_BadInternalCall();
804 return -1;
805 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000806
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000807 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000808#ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj)) {
810 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
811 if (obj == NULL)
812 return -1;
813 }
814 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000815#endif
Christian Heimes44720832008-05-26 13:01:01 +0000816 {
817 PyErr_Format(PyExc_TypeError,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj)->tp_name);
820 return -1;
821 }
822 }
823
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000824 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000825 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000826 *len = PyString_GET_SIZE(obj);
827 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000828 PyErr_SetString(PyExc_TypeError,
829 "expected string without null bytes");
830 return -1;
831 }
832 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000833}
834
Christian Heimes1a6387e2008-03-26 12:49:49 +0000835/* -------------------------------------------------------------------- */
836/* Methods */
837
Christian Heimes44720832008-05-26 13:01:01 +0000838#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000839#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000840
Christian Heimes1a6387e2008-03-26 12:49:49 +0000841#include "stringlib/count.h"
842#include "stringlib/find.h"
843#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000844#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000846#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000847#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000848
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
850
851static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000852string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000853{
Christian Heimes44720832008-05-26 13:01:01 +0000854 Py_ssize_t i, str_len;
855 char c;
856 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000857
Christian Heimes44720832008-05-26 13:01:01 +0000858 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000859 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000860 int ret;
861 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000862 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000863 if (op == NULL)
864 return -1;
865 ret = string_print(op, fp, flags);
866 Py_DECREF(op);
867 return ret;
868 }
869 if (flags & Py_PRINT_RAW) {
870 char *data = op->ob_sval;
871 Py_ssize_t size = Py_SIZE(op);
872 Py_BEGIN_ALLOW_THREADS
873 while (size > INT_MAX) {
874 /* Very long strings cannot be written atomically.
875 * But don't write exactly INT_MAX bytes at a time
876 * to avoid memory aligment issues.
877 */
878 const int chunk_size = INT_MAX & ~0x3FFF;
879 fwrite(data, 1, chunk_size, fp);
880 data += chunk_size;
881 size -= chunk_size;
882 }
883#ifdef __VMS
884 if (size) fwrite(data, (int)size, 1, fp);
885#else
886 fwrite(data, 1, (int)size, fp);
887#endif
888 Py_END_ALLOW_THREADS
889 return 0;
890 }
891
892 /* figure out which quote to use; single is preferred */
893 quote = '\'';
894 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
895 !memchr(op->ob_sval, '"', Py_SIZE(op)))
896 quote = '"';
897
898 str_len = Py_SIZE(op);
899 Py_BEGIN_ALLOW_THREADS
900 fputc(quote, fp);
901 for (i = 0; i < str_len; i++) {
902 /* Since strings are immutable and the caller should have a
903 reference, accessing the interal buffer should not be an issue
904 with the GIL released. */
905 c = op->ob_sval[i];
906 if (c == quote || c == '\\')
907 fprintf(fp, "\\%c", c);
908 else if (c == '\t')
909 fprintf(fp, "\\t");
910 else if (c == '\n')
911 fprintf(fp, "\\n");
912 else if (c == '\r')
913 fprintf(fp, "\\r");
914 else if (c < ' ' || c >= 0x7f)
915 fprintf(fp, "\\x%02x", c & 0xff);
916 else
917 fputc(c, fp);
918 }
919 fputc(quote, fp);
920 Py_END_ALLOW_THREADS
921 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000922}
923
Christian Heimes44720832008-05-26 13:01:01 +0000924PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000925PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000926{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000927 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000928 size_t newsize = 2 + 4 * Py_SIZE(op);
929 PyObject *v;
930 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
931 PyErr_SetString(PyExc_OverflowError,
932 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000934 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000935 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000936 if (v == NULL) {
937 return NULL;
938 }
939 else {
940 register Py_ssize_t i;
941 register char c;
942 register char *p;
943 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000944
Christian Heimes44720832008-05-26 13:01:01 +0000945 /* figure out which quote to use; single is preferred */
946 quote = '\'';
947 if (smartquotes &&
948 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
949 !memchr(op->ob_sval, '"', Py_SIZE(op)))
950 quote = '"';
951
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000952 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000953 *p++ = quote;
954 for (i = 0; i < Py_SIZE(op); i++) {
955 /* There's at least enough room for a hex escape
956 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000957 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000958 c = op->ob_sval[i];
959 if (c == quote || c == '\\')
960 *p++ = '\\', *p++ = c;
961 else if (c == '\t')
962 *p++ = '\\', *p++ = 't';
963 else if (c == '\n')
964 *p++ = '\\', *p++ = 'n';
965 else if (c == '\r')
966 *p++ = '\\', *p++ = 'r';
967 else if (c < ' ' || c >= 0x7f) {
968 /* For performance, we don't want to call
969 PyOS_snprintf here (extra layers of
970 function call). */
971 sprintf(p, "\\x%02x", c & 0xff);
972 p += 4;
973 }
974 else
975 *p++ = c;
976 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000977 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000978 *p++ = quote;
979 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000980 _PyString_Resize(
981 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000982 return v;
983 }
984}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985
986static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000987string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000988{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000989 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000990}
991
Christian Heimes1a6387e2008-03-26 12:49:49 +0000992static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000993string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000994{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000995 assert(PyString_Check(s));
996 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000997 Py_INCREF(s);
998 return s;
999 }
1000 else {
1001 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001002 PyStringObject *t = (PyStringObject *) s;
1003 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +00001004 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001005}
1006
Christian Heimes44720832008-05-26 13:01:01 +00001007static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001008string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001009{
1010 return Py_SIZE(a);
1011}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001012
Christian Heimes44720832008-05-26 13:01:01 +00001013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001014string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001015{
1016 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001017 register PyStringObject *op;
1018 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001019#ifdef Py_USING_UNICODE
1020 if (PyUnicode_Check(bb))
1021 return PyUnicode_Concat((PyObject *)a, bb);
1022#endif
1023 if (PyByteArray_Check(bb))
1024 return PyByteArray_Concat((PyObject *)a, bb);
1025 PyErr_Format(PyExc_TypeError,
1026 "cannot concatenate 'str' and '%.200s' objects",
1027 Py_TYPE(bb)->tp_name);
1028 return NULL;
1029 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001030#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +00001031 /* Optimize cases with empty left or right operand */
1032 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001033 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001034 if (Py_SIZE(a) == 0) {
1035 Py_INCREF(bb);
1036 return bb;
1037 }
1038 Py_INCREF(a);
1039 return (PyObject *)a;
1040 }
1041 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +00001042 /* Check that string sizes are not negative, to prevent an
1043 overflow in cases where we are passed incorrectly-created
1044 strings with negative lengths (due to a bug in other code).
1045 */
1046 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1047 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001048 PyErr_SetString(PyExc_OverflowError,
1049 "strings are too large to concat");
1050 return NULL;
1051 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001052
Christian Heimes44720832008-05-26 13:01:01 +00001053 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001054 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001055 PyErr_SetString(PyExc_OverflowError,
1056 "strings are too large to concat");
1057 return NULL;
1058 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001059 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001060 if (op == NULL)
1061 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001062 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001063 op->ob_shash = -1;
1064 op->ob_sstate = SSTATE_NOT_INTERNED;
1065 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1066 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1067 op->ob_sval[size] = '\0';
1068 return (PyObject *) op;
1069#undef b
1070}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001071
Christian Heimes44720832008-05-26 13:01:01 +00001072static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001073string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001074{
1075 register Py_ssize_t i;
1076 register Py_ssize_t j;
1077 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001078 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001079 size_t nbytes;
1080 if (n < 0)
1081 n = 0;
1082 /* watch out for overflows: the size can overflow int,
1083 * and the # of bytes needed can overflow size_t
1084 */
1085 size = Py_SIZE(a) * n;
1086 if (n && size / n != Py_SIZE(a)) {
1087 PyErr_SetString(PyExc_OverflowError,
1088 "repeated string is too long");
1089 return NULL;
1090 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001091 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001092 Py_INCREF(a);
1093 return (PyObject *)a;
1094 }
1095 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001096 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001097 PyErr_SetString(PyExc_OverflowError,
1098 "repeated string is too long");
1099 return NULL;
1100 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001101 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001102 if (op == NULL)
1103 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001104 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001105 op->ob_shash = -1;
1106 op->ob_sstate = SSTATE_NOT_INTERNED;
1107 op->ob_sval[size] = '\0';
1108 if (Py_SIZE(a) == 1 && n > 0) {
1109 memset(op->ob_sval, a->ob_sval[0] , n);
1110 return (PyObject *) op;
1111 }
1112 i = 0;
1113 if (i < size) {
1114 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1115 i = Py_SIZE(a);
1116 }
1117 while (i < size) {
1118 j = (i <= size-i) ? i : size-i;
1119 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1120 i += j;
1121 }
1122 return (PyObject *) op;
1123}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001124
Christian Heimes44720832008-05-26 13:01:01 +00001125/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1126
1127static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001128string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001129 register Py_ssize_t j)
1130 /* j -- may be negative! */
1131{
1132 if (i < 0)
1133 i = 0;
1134 if (j < 0)
1135 j = 0; /* Avoid signed/unsigned bug in next line */
1136 if (j > Py_SIZE(a))
1137 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001138 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001139 /* It's the same as a */
1140 Py_INCREF(a);
1141 return (PyObject *)a;
1142 }
1143 if (j < i)
1144 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001145 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001146}
1147
1148static int
1149string_contains(PyObject *str_obj, PyObject *sub_obj)
1150{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001151 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001152#ifdef Py_USING_UNICODE
1153 if (PyUnicode_Check(sub_obj))
1154 return PyUnicode_Contains(str_obj, sub_obj);
1155#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001156 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001157 PyErr_Format(PyExc_TypeError,
1158 "'in <string>' requires string as left operand, "
1159 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1160 return -1;
1161 }
1162 }
1163
1164 return stringlib_contains_obj(str_obj, sub_obj);
1165}
1166
1167static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001168string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001169{
1170 char pchar;
1171 PyObject *v;
1172 if (i < 0 || i >= Py_SIZE(a)) {
1173 PyErr_SetString(PyExc_IndexError, "string index out of range");
1174 return NULL;
1175 }
1176 pchar = a->ob_sval[i];
1177 v = (PyObject *)characters[pchar & UCHAR_MAX];
1178 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001179 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001180 else {
1181#ifdef COUNT_ALLOCS
1182 one_strings++;
1183#endif
1184 Py_INCREF(v);
1185 }
1186 return v;
1187}
1188
1189static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001190string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001191{
1192 int c;
1193 Py_ssize_t len_a, len_b;
1194 Py_ssize_t min_len;
1195 PyObject *result;
1196
1197 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001198 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001199 result = Py_NotImplemented;
1200 goto out;
1201 }
1202 if (a == b) {
1203 switch (op) {
1204 case Py_EQ:case Py_LE:case Py_GE:
1205 result = Py_True;
1206 goto out;
1207 case Py_NE:case Py_LT:case Py_GT:
1208 result = Py_False;
1209 goto out;
1210 }
1211 }
1212 if (op == Py_EQ) {
1213 /* Supporting Py_NE here as well does not save
1214 much time, since Py_NE is rarely used. */
1215 if (Py_SIZE(a) == Py_SIZE(b)
1216 && (a->ob_sval[0] == b->ob_sval[0]
1217 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1218 result = Py_True;
1219 } else {
1220 result = Py_False;
1221 }
1222 goto out;
1223 }
1224 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1225 min_len = (len_a < len_b) ? len_a : len_b;
1226 if (min_len > 0) {
1227 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1228 if (c==0)
1229 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1230 } else
1231 c = 0;
1232 if (c == 0)
1233 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1234 switch (op) {
1235 case Py_LT: c = c < 0; break;
1236 case Py_LE: c = c <= 0; break;
1237 case Py_EQ: assert(0); break; /* unreachable */
1238 case Py_NE: c = c != 0; break;
1239 case Py_GT: c = c > 0; break;
1240 case Py_GE: c = c >= 0; break;
1241 default:
1242 result = Py_NotImplemented;
1243 goto out;
1244 }
1245 result = c ? Py_True : Py_False;
1246 out:
1247 Py_INCREF(result);
1248 return result;
1249}
1250
1251int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001252_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001253{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001254 PyStringObject *a = (PyStringObject*) o1;
1255 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001256 return Py_SIZE(a) == Py_SIZE(b)
1257 && *a->ob_sval == *b->ob_sval
1258 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1259}
1260
1261static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001262string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001263{
1264 register Py_ssize_t len;
1265 register unsigned char *p;
1266 register long x;
1267
1268 if (a->ob_shash != -1)
1269 return a->ob_shash;
1270 len = Py_SIZE(a);
1271 p = (unsigned char *) a->ob_sval;
1272 x = *p << 7;
1273 while (--len >= 0)
1274 x = (1000003*x) ^ *p++;
1275 x ^= Py_SIZE(a);
1276 if (x == -1)
1277 x = -2;
1278 a->ob_shash = x;
1279 return x;
1280}
1281
1282static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001283string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001284{
1285 if (PyIndex_Check(item)) {
1286 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1287 if (i == -1 && PyErr_Occurred())
1288 return NULL;
1289 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001290 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001291 return string_item(self, i);
1292 }
1293 else if (PySlice_Check(item)) {
1294 Py_ssize_t start, stop, step, slicelength, cur, i;
1295 char* source_buf;
1296 char* result_buf;
1297 PyObject* result;
1298
1299 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001300 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001301 &start, &stop, &step, &slicelength) < 0) {
1302 return NULL;
1303 }
1304
1305 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001306 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001307 }
1308 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309 slicelength == PyString_GET_SIZE(self) &&
1310 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001311 Py_INCREF(self);
1312 return (PyObject *)self;
1313 }
1314 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001315 return PyString_FromStringAndSize(
1316 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001317 slicelength);
1318 }
1319 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001320 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001321 result_buf = (char *)PyMem_Malloc(slicelength);
1322 if (result_buf == NULL)
1323 return PyErr_NoMemory();
1324
1325 for (cur = start, i = 0; i < slicelength;
1326 cur += step, i++) {
1327 result_buf[i] = source_buf[cur];
1328 }
1329
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001330 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001331 slicelength);
1332 PyMem_Free(result_buf);
1333 return result;
1334 }
1335 }
1336 else {
1337 PyErr_Format(PyExc_TypeError,
1338 "string indices must be integers, not %.200s",
1339 Py_TYPE(item)->tp_name);
1340 return NULL;
1341 }
1342}
1343
1344static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001345string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001346{
1347 if ( index != 0 ) {
1348 PyErr_SetString(PyExc_SystemError,
1349 "accessing non-existent string segment");
1350 return -1;
1351 }
1352 *ptr = (void *)self->ob_sval;
1353 return Py_SIZE(self);
1354}
1355
1356static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001357string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001358{
1359 PyErr_SetString(PyExc_TypeError,
1360 "Cannot use string as modifiable buffer");
1361 return -1;
1362}
1363
1364static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001365string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001366{
1367 if ( lenp )
1368 *lenp = Py_SIZE(self);
1369 return 1;
1370}
1371
1372static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001373string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001374{
1375 if ( index != 0 ) {
1376 PyErr_SetString(PyExc_SystemError,
1377 "accessing non-existent string segment");
1378 return -1;
1379 }
1380 *ptr = self->ob_sval;
1381 return Py_SIZE(self);
1382}
1383
1384static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001385string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001386{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001387 return PyBuffer_FillInfo(view, (PyObject*)self,
1388 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001389 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001390}
1391
1392static PySequenceMethods string_as_sequence = {
1393 (lenfunc)string_length, /*sq_length*/
1394 (binaryfunc)string_concat, /*sq_concat*/
1395 (ssizeargfunc)string_repeat, /*sq_repeat*/
1396 (ssizeargfunc)string_item, /*sq_item*/
1397 (ssizessizeargfunc)string_slice, /*sq_slice*/
1398 0, /*sq_ass_item*/
1399 0, /*sq_ass_slice*/
1400 (objobjproc)string_contains /*sq_contains*/
1401};
1402
1403static PyMappingMethods string_as_mapping = {
1404 (lenfunc)string_length,
1405 (binaryfunc)string_subscript,
1406 0,
1407};
1408
1409static PyBufferProcs string_as_buffer = {
1410 (readbufferproc)string_buffer_getreadbuf,
1411 (writebufferproc)string_buffer_getwritebuf,
1412 (segcountproc)string_buffer_getsegcount,
1413 (charbufferproc)string_buffer_getcharbuf,
1414 (getbufferproc)string_buffer_getbuffer,
1415 0, /* XXX */
1416};
1417
1418
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001419
Christian Heimes44720832008-05-26 13:01:01 +00001420#define LEFTSTRIP 0
1421#define RIGHTSTRIP 1
1422#define BOTHSTRIP 2
1423
1424/* Arrays indexed by above */
1425static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1426
1427#define STRIPNAME(i) (stripformat[i]+3)
1428
Christian Heimes1a6387e2008-03-26 12:49:49 +00001429PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001430"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001431\n\
Christian Heimes44720832008-05-26 13:01:01 +00001432Return a list of the words in the string S, using sep as the\n\
1433delimiter string. If maxsplit is given, at most maxsplit\n\
1434splits are done. If sep is not specified or is None, any\n\
1435whitespace string is a separator and empty strings are removed\n\
1436from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437
1438static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001439string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001440{
Antoine Pitrou64672132010-01-13 07:55:48 +00001441 Py_ssize_t len = PyString_GET_SIZE(self), n;
1442 Py_ssize_t maxsplit = -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001443 const char *s = PyString_AS_STRING(self), *sub;
Antoine Pitrou64672132010-01-13 07:55:48 +00001444 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001445
Christian Heimes44720832008-05-26 13:01:01 +00001446 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1447 return NULL;
1448 if (maxsplit < 0)
1449 maxsplit = PY_SSIZE_T_MAX;
1450 if (subobj == Py_None)
Antoine Pitrou64672132010-01-13 07:55:48 +00001451 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001452 if (PyString_Check(subobj)) {
1453 sub = PyString_AS_STRING(subobj);
1454 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001455 }
1456#ifdef Py_USING_UNICODE
1457 else if (PyUnicode_Check(subobj))
1458 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1459#endif
1460 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1461 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001462
Antoine Pitrou64672132010-01-13 07:55:48 +00001463 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001464}
1465
1466PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001467"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001468\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001469Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001470the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001471found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472
1473static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001474string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475{
Christian Heimes44720832008-05-26 13:01:01 +00001476 const char *sep;
1477 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001479 if (PyString_Check(sep_obj)) {
1480 sep = PyString_AS_STRING(sep_obj);
1481 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001482 }
1483#ifdef Py_USING_UNICODE
1484 else if (PyUnicode_Check(sep_obj))
1485 return PyUnicode_Partition((PyObject *) self, sep_obj);
1486#endif
1487 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1488 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001489
Christian Heimes44720832008-05-26 13:01:01 +00001490 return stringlib_partition(
1491 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001492 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001493 sep_obj, sep, sep_len
1494 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001495}
1496
1497PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001498"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001499\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001500Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001501the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001502separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503
1504static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001505string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506{
Christian Heimes44720832008-05-26 13:01:01 +00001507 const char *sep;
1508 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001510 if (PyString_Check(sep_obj)) {
1511 sep = PyString_AS_STRING(sep_obj);
1512 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001513 }
1514#ifdef Py_USING_UNICODE
1515 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001516 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001517#endif
1518 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1519 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001520
Christian Heimes44720832008-05-26 13:01:01 +00001521 return stringlib_rpartition(
1522 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001523 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001524 sep_obj, sep, sep_len
1525 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001526}
1527
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001529"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530\n\
Christian Heimes44720832008-05-26 13:01:01 +00001531Return a list of the words in the string S, using sep as the\n\
1532delimiter string, starting at the end of the string and working\n\
1533to the front. If maxsplit is given, at most maxsplit splits are\n\
1534done. If sep is not specified or is None, any whitespace string\n\
1535is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536
1537static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001538string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539{
Antoine Pitrou64672132010-01-13 07:55:48 +00001540 Py_ssize_t len = PyString_GET_SIZE(self), n;
1541 Py_ssize_t maxsplit = -1;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001542 const char *s = PyString_AS_STRING(self), *sub;
Antoine Pitrou64672132010-01-13 07:55:48 +00001543 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001544
Christian Heimes44720832008-05-26 13:01:01 +00001545 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1546 return NULL;
1547 if (maxsplit < 0)
1548 maxsplit = PY_SSIZE_T_MAX;
1549 if (subobj == Py_None)
Antoine Pitrou64672132010-01-13 07:55:48 +00001550 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001551 if (PyString_Check(subobj)) {
1552 sub = PyString_AS_STRING(subobj);
1553 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001554 }
1555#ifdef Py_USING_UNICODE
1556 else if (PyUnicode_Check(subobj))
1557 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1558#endif
1559 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1560 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001561
Antoine Pitrou64672132010-01-13 07:55:48 +00001562 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001563}
1564
1565
1566PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001567"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001568\n\
1569Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001570iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001571
1572static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001573string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001574{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001575 char *sep = PyString_AS_STRING(self);
1576 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001577 PyObject *res = NULL;
1578 char *p;
1579 Py_ssize_t seqlen = 0;
1580 size_t sz = 0;
1581 Py_ssize_t i;
1582 PyObject *seq, *item;
1583
1584 seq = PySequence_Fast(orig, "");
1585 if (seq == NULL) {
1586 return NULL;
1587 }
1588
1589 seqlen = PySequence_Size(seq);
1590 if (seqlen == 0) {
1591 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001592 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001593 }
1594 if (seqlen == 1) {
1595 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001596 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001597 Py_INCREF(item);
1598 Py_DECREF(seq);
1599 return item;
1600 }
1601 }
1602
1603 /* There are at least two things to join, or else we have a subclass
1604 * of the builtin types in the sequence.
1605 * Do a pre-pass to figure out the total amount of space we'll
1606 * need (sz), see whether any argument is absurd, and defer to
1607 * the Unicode join if appropriate.
1608 */
1609 for (i = 0; i < seqlen; i++) {
1610 const size_t old_sz = sz;
1611 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001612 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001613#ifdef Py_USING_UNICODE
1614 if (PyUnicode_Check(item)) {
1615 /* Defer to Unicode join.
1616 * CAUTION: There's no gurantee that the
1617 * original sequence can be iterated over
1618 * again, so we must pass seq here.
1619 */
1620 PyObject *result;
1621 result = PyUnicode_Join((PyObject *)self, seq);
1622 Py_DECREF(seq);
1623 return result;
1624 }
1625#endif
1626 PyErr_Format(PyExc_TypeError,
1627 "sequence item %zd: expected string,"
1628 " %.80s found",
1629 i, Py_TYPE(item)->tp_name);
1630 Py_DECREF(seq);
1631 return NULL;
1632 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001633 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001634 if (i != 0)
1635 sz += seplen;
1636 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1637 PyErr_SetString(PyExc_OverflowError,
1638 "join() result is too long for a Python string");
1639 Py_DECREF(seq);
1640 return NULL;
1641 }
1642 }
1643
1644 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001645 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001646 if (res == NULL) {
1647 Py_DECREF(seq);
1648 return NULL;
1649 }
1650
1651 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001652 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001653 for (i = 0; i < seqlen; ++i) {
1654 size_t n;
1655 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001656 n = PyString_GET_SIZE(item);
1657 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001658 p += n;
1659 if (i < seqlen - 1) {
1660 Py_MEMCPY(p, sep, seplen);
1661 p += seplen;
1662 }
1663 }
1664
1665 Py_DECREF(seq);
1666 return res;
1667}
1668
1669PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001670_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001671{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001672 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001673 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001674 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001675}
1676
Antoine Pitrou64672132010-01-13 07:55:48 +00001677/* helper macro to fixup start/end slice values */
1678#define ADJUST_INDICES(start, end, len) \
1679 if (end > len) \
1680 end = len; \
1681 else if (end < 0) { \
1682 end += len; \
1683 if (end < 0) \
1684 end = 0; \
1685 } \
1686 if (start < 0) { \
1687 start += len; \
1688 if (start < 0) \
1689 start = 0; \
1690 }
Christian Heimes44720832008-05-26 13:01:01 +00001691
1692Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001693string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001694{
1695 PyObject *subobj;
1696 const char *sub;
1697 Py_ssize_t sub_len;
1698 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1699 PyObject *obj_start=Py_None, *obj_end=Py_None;
1700
1701 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1702 &obj_start, &obj_end))
1703 return -2;
1704 /* To support None in "start" and "end" arguments, meaning
1705 the same as if they were not passed.
1706 */
1707 if (obj_start != Py_None)
1708 if (!_PyEval_SliceIndex(obj_start, &start))
1709 return -2;
1710 if (obj_end != Py_None)
1711 if (!_PyEval_SliceIndex(obj_end, &end))
1712 return -2;
1713
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001714 if (PyString_Check(subobj)) {
1715 sub = PyString_AS_STRING(subobj);
1716 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001717 }
1718#ifdef Py_USING_UNICODE
1719 else if (PyUnicode_Check(subobj))
1720 return PyUnicode_Find(
1721 (PyObject *)self, subobj, start, end, dir);
1722#endif
1723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1726 return -2;
1727
1728 if (dir > 0)
1729 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001730 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001731 sub, sub_len, start, end);
1732 else
1733 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001735 sub, sub_len, start, end);
1736}
1737
1738
1739PyDoc_STRVAR(find__doc__,
1740"S.find(sub [,start [,end]]) -> int\n\
1741\n\
1742Return the lowest index in S where substring sub is found,\n\
1743such that sub is contained within s[start:end]. Optional\n\
1744arguments start and end are interpreted as in slice notation.\n\
1745\n\
1746Return -1 on failure.");
1747
1748static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001749string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001750{
1751 Py_ssize_t result = string_find_internal(self, args, +1);
1752 if (result == -2)
1753 return NULL;
1754 return PyInt_FromSsize_t(result);
1755}
1756
1757
1758PyDoc_STRVAR(index__doc__,
1759"S.index(sub [,start [,end]]) -> int\n\
1760\n\
1761Like S.find() but raise ValueError when the substring is not found.");
1762
1763static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001764string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001765{
1766 Py_ssize_t result = string_find_internal(self, args, +1);
1767 if (result == -2)
1768 return NULL;
1769 if (result == -1) {
1770 PyErr_SetString(PyExc_ValueError,
1771 "substring not found");
1772 return NULL;
1773 }
1774 return PyInt_FromSsize_t(result);
1775}
1776
1777
1778PyDoc_STRVAR(rfind__doc__,
1779"S.rfind(sub [,start [,end]]) -> int\n\
1780\n\
1781Return the highest index in S where substring sub is found,\n\
1782such that sub is contained within s[start:end]. Optional\n\
1783arguments start and end are interpreted as in slice notation.\n\
1784\n\
1785Return -1 on failure.");
1786
1787static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001788string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001789{
1790 Py_ssize_t result = string_find_internal(self, args, -1);
1791 if (result == -2)
1792 return NULL;
1793 return PyInt_FromSsize_t(result);
1794}
1795
1796
1797PyDoc_STRVAR(rindex__doc__,
1798"S.rindex(sub [,start [,end]]) -> int\n\
1799\n\
1800Like S.rfind() but raise ValueError when the substring is not found.");
1801
1802static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001803string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001804{
1805 Py_ssize_t result = string_find_internal(self, args, -1);
1806 if (result == -2)
1807 return NULL;
1808 if (result == -1) {
1809 PyErr_SetString(PyExc_ValueError,
1810 "substring not found");
1811 return NULL;
1812 }
1813 return PyInt_FromSsize_t(result);
1814}
1815
1816
1817Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001818do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001819{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001820 char *s = PyString_AS_STRING(self);
1821 Py_ssize_t len = PyString_GET_SIZE(self);
1822 char *sep = PyString_AS_STRING(sepobj);
1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00001824 Py_ssize_t i, j;
1825
1826 i = 0;
1827 if (striptype != RIGHTSTRIP) {
1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1829 i++;
1830 }
1831 }
1832
1833 j = len;
1834 if (striptype != LEFTSTRIP) {
1835 do {
1836 j--;
1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1838 j++;
1839 }
1840
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001841 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001842 Py_INCREF(self);
1843 return (PyObject*)self;
1844 }
1845 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001846 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001847}
1848
1849
1850Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001851do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001852{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001853 char *s = PyString_AS_STRING(self);
1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001855
1856 i = 0;
1857 if (striptype != RIGHTSTRIP) {
1858 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1859 i++;
1860 }
1861 }
1862
1863 j = len;
1864 if (striptype != LEFTSTRIP) {
1865 do {
1866 j--;
1867 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1868 j++;
1869 }
1870
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001871 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001872 Py_INCREF(self);
1873 return (PyObject*)self;
1874 }
1875 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001876 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001877}
1878
1879
1880Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001881do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001882{
1883 PyObject *sep = NULL;
1884
1885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1886 return NULL;
1887
1888 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001889 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00001890 return do_xstrip(self, striptype, sep);
1891#ifdef Py_USING_UNICODE
1892 else if (PyUnicode_Check(sep)) {
1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1894 PyObject *res;
1895 if (uniself==NULL)
1896 return NULL;
1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1898 striptype, sep);
1899 Py_DECREF(uniself);
1900 return res;
1901 }
1902#endif
1903 PyErr_Format(PyExc_TypeError,
1904#ifdef Py_USING_UNICODE
1905 "%s arg must be None, str or unicode",
1906#else
1907 "%s arg must be None or str",
1908#endif
1909 STRIPNAME(striptype));
1910 return NULL;
1911 }
1912
1913 return do_strip(self, striptype);
1914}
1915
1916
1917PyDoc_STRVAR(strip__doc__,
1918"S.strip([chars]) -> string or unicode\n\
1919\n\
1920Return a copy of the string S with leading and trailing\n\
1921whitespace removed.\n\
1922If chars is given and not None, remove characters in chars instead.\n\
1923If chars is unicode, S will be converted to unicode before stripping");
1924
1925static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001926string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001927{
1928 if (PyTuple_GET_SIZE(args) == 0)
1929 return do_strip(self, BOTHSTRIP); /* Common case */
1930 else
1931 return do_argstrip(self, BOTHSTRIP, args);
1932}
1933
1934
1935PyDoc_STRVAR(lstrip__doc__,
1936"S.lstrip([chars]) -> string or unicode\n\
1937\n\
1938Return a copy of the string S with leading whitespace removed.\n\
1939If chars is given and not None, remove characters in chars instead.\n\
1940If chars is unicode, S will be converted to unicode before stripping");
1941
1942static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001943string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001944{
1945 if (PyTuple_GET_SIZE(args) == 0)
1946 return do_strip(self, LEFTSTRIP); /* Common case */
1947 else
1948 return do_argstrip(self, LEFTSTRIP, args);
1949}
1950
1951
1952PyDoc_STRVAR(rstrip__doc__,
1953"S.rstrip([chars]) -> string or unicode\n\
1954\n\
1955Return a copy of the string S with trailing whitespace removed.\n\
1956If chars is given and not None, remove characters in chars instead.\n\
1957If chars is unicode, S will be converted to unicode before stripping");
1958
1959static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001960string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001961{
1962 if (PyTuple_GET_SIZE(args) == 0)
1963 return do_strip(self, RIGHTSTRIP); /* Common case */
1964 else
1965 return do_argstrip(self, RIGHTSTRIP, args);
1966}
1967
1968
1969PyDoc_STRVAR(lower__doc__,
1970"S.lower() -> string\n\
1971\n\
1972Return a copy of the string S converted to lowercase.");
1973
1974/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1975#ifndef _tolower
1976#define _tolower tolower
1977#endif
1978
1979static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001980string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001981{
1982 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001983 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001984 PyObject *newobj;
1985
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001986 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00001987 if (!newobj)
1988 return NULL;
1989
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001991
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001992 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001993
1994 for (i = 0; i < n; i++) {
1995 int c = Py_CHARMASK(s[i]);
1996 if (isupper(c))
1997 s[i] = _tolower(c);
1998 }
1999
2000 return newobj;
2001}
2002
2003PyDoc_STRVAR(upper__doc__,
2004"S.upper() -> string\n\
2005\n\
2006Return a copy of the string S converted to uppercase.");
2007
2008#ifndef _toupper
2009#define _toupper toupper
2010#endif
2011
2012static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002013string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002014{
2015 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002016 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002017 PyObject *newobj;
2018
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002019 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002020 if (!newobj)
2021 return NULL;
2022
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002023 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002024
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002025 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002026
2027 for (i = 0; i < n; i++) {
2028 int c = Py_CHARMASK(s[i]);
2029 if (islower(c))
2030 s[i] = _toupper(c);
2031 }
2032
2033 return newobj;
2034}
2035
2036PyDoc_STRVAR(title__doc__,
2037"S.title() -> string\n\
2038\n\
2039Return a titlecased version of S, i.e. words start with uppercase\n\
2040characters, all remaining cased characters have lowercase.");
2041
2042static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002043string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002044{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002045 char *s = PyString_AS_STRING(self), *s_new;
2046 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002047 int previous_is_cased = 0;
2048 PyObject *newobj;
2049
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002050 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002051 if (newobj == NULL)
2052 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002053 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2056 if (islower(c)) {
2057 if (!previous_is_cased)
2058 c = toupper(c);
2059 previous_is_cased = 1;
2060 } else if (isupper(c)) {
2061 if (previous_is_cased)
2062 c = tolower(c);
2063 previous_is_cased = 1;
2064 } else
2065 previous_is_cased = 0;
2066 *s_new++ = c;
2067 }
2068 return newobj;
2069}
2070
2071PyDoc_STRVAR(capitalize__doc__,
2072"S.capitalize() -> string\n\
2073\n\
2074Return a copy of the string S with only its first character\n\
2075capitalized.");
2076
2077static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002078string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002079{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002080 char *s = PyString_AS_STRING(self), *s_new;
2081 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002082 PyObject *newobj;
2083
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002084 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002085 if (newobj == NULL)
2086 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002087 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002088 if (0 < n) {
2089 int c = Py_CHARMASK(*s++);
2090 if (islower(c))
2091 *s_new = toupper(c);
2092 else
2093 *s_new = c;
2094 s_new++;
2095 }
2096 for (i = 1; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2098 if (isupper(c))
2099 *s_new = tolower(c);
2100 else
2101 *s_new = c;
2102 s_new++;
2103 }
2104 return newobj;
2105}
2106
2107
2108PyDoc_STRVAR(count__doc__,
2109"S.count(sub[, start[, end]]) -> int\n\
2110\n\
2111Return the number of non-overlapping occurrences of substring sub in\n\
2112string S[start:end]. Optional arguments start and end are interpreted\n\
2113as in slice notation.");
2114
2115static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002116string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002117{
2118 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002119 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002120 Py_ssize_t sub_len;
2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2122
2123 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2124 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2125 return NULL;
2126
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002127 if (PyString_Check(sub_obj)) {
2128 sub = PyString_AS_STRING(sub_obj);
2129 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002130 }
2131#ifdef Py_USING_UNICODE
2132 else if (PyUnicode_Check(sub_obj)) {
2133 Py_ssize_t count;
2134 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2135 if (count == -1)
2136 return NULL;
2137 else
2138 return PyInt_FromSsize_t(count);
2139 }
2140#endif
2141 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2142 return NULL;
2143
Antoine Pitrou64672132010-01-13 07:55:48 +00002144 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002145
2146 return PyInt_FromSsize_t(
Antoine Pitrou64672132010-01-13 07:55:48 +00002147 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
Christian Heimes44720832008-05-26 13:01:01 +00002148 );
2149}
2150
2151PyDoc_STRVAR(swapcase__doc__,
2152"S.swapcase() -> string\n\
2153\n\
2154Return a copy of the string S with uppercase characters\n\
2155converted to lowercase and vice versa.");
2156
2157static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002158string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002159{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002160 char *s = PyString_AS_STRING(self), *s_new;
2161 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002162 PyObject *newobj;
2163
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002164 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002165 if (newobj == NULL)
2166 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002167 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002168 for (i = 0; i < n; i++) {
2169 int c = Py_CHARMASK(*s++);
2170 if (islower(c)) {
2171 *s_new = toupper(c);
2172 }
2173 else if (isupper(c)) {
2174 *s_new = tolower(c);
2175 }
2176 else
2177 *s_new = c;
2178 s_new++;
2179 }
2180 return newobj;
2181}
2182
2183
2184PyDoc_STRVAR(translate__doc__,
2185"S.translate(table [,deletechars]) -> string\n\
2186\n\
2187Return a copy of the string S, where all characters occurring\n\
2188in the optional argument deletechars are removed, and the\n\
2189remaining characters have been mapped through the given\n\
2190translation table, which must be a string of length 256.");
2191
2192static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002193string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002194{
2195 register char *input, *output;
2196 const char *table;
2197 register Py_ssize_t i, c, changed = 0;
2198 PyObject *input_obj = (PyObject*)self;
2199 const char *output_start, *del_table=NULL;
2200 Py_ssize_t inlen, tablen, dellen = 0;
2201 PyObject *result;
2202 int trans_table[256];
2203 PyObject *tableobj, *delobj = NULL;
2204
2205 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2206 &tableobj, &delobj))
2207 return NULL;
2208
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002209 if (PyString_Check(tableobj)) {
2210 table = PyString_AS_STRING(tableobj);
2211 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002212 }
2213 else if (tableobj == Py_None) {
2214 table = NULL;
2215 tablen = 256;
2216 }
2217#ifdef Py_USING_UNICODE
2218 else if (PyUnicode_Check(tableobj)) {
2219 /* Unicode .translate() does not support the deletechars
2220 parameter; instead a mapping to None will cause characters
2221 to be deleted. */
2222 if (delobj != NULL) {
2223 PyErr_SetString(PyExc_TypeError,
2224 "deletions are implemented differently for unicode");
2225 return NULL;
2226 }
2227 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2228 }
2229#endif
2230 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2231 return NULL;
2232
2233 if (tablen != 256) {
2234 PyErr_SetString(PyExc_ValueError,
2235 "translation table must be 256 characters long");
2236 return NULL;
2237 }
2238
2239 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002240 if (PyString_Check(delobj)) {
2241 del_table = PyString_AS_STRING(delobj);
2242 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002243 }
2244#ifdef Py_USING_UNICODE
2245 else if (PyUnicode_Check(delobj)) {
2246 PyErr_SetString(PyExc_TypeError,
2247 "deletions are implemented differently for unicode");
2248 return NULL;
2249 }
2250#endif
2251 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2252 return NULL;
2253 }
2254 else {
2255 del_table = NULL;
2256 dellen = 0;
2257 }
2258
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002259 inlen = PyString_GET_SIZE(input_obj);
2260 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002261 if (result == NULL)
2262 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002263 output_start = output = PyString_AsString(result);
2264 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002265
2266 if (dellen == 0 && table != NULL) {
2267 /* If no deletions are required, use faster code */
2268 for (i = inlen; --i >= 0; ) {
2269 c = Py_CHARMASK(*input++);
2270 if (Py_CHARMASK((*output++ = table[c])) != c)
2271 changed = 1;
2272 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002273 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002274 return result;
2275 Py_DECREF(result);
2276 Py_INCREF(input_obj);
2277 return input_obj;
2278 }
2279
2280 if (table == NULL) {
2281 for (i = 0; i < 256; i++)
2282 trans_table[i] = Py_CHARMASK(i);
2283 } else {
2284 for (i = 0; i < 256; i++)
2285 trans_table[i] = Py_CHARMASK(table[i]);
2286 }
2287
2288 for (i = 0; i < dellen; i++)
2289 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2290
2291 for (i = inlen; --i >= 0; ) {
2292 c = Py_CHARMASK(*input++);
2293 if (trans_table[c] != -1)
2294 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2295 continue;
2296 changed = 1;
2297 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002298 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002299 Py_DECREF(result);
2300 Py_INCREF(input_obj);
2301 return input_obj;
2302 }
2303 /* Fix the size of the resulting string */
2304 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002305 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002306 return result;
2307}
2308
2309
Christian Heimes44720832008-05-26 13:01:01 +00002310/* find and count characters and substrings */
2311
2312#define findchar(target, target_len, c) \
2313 ((char *)memchr((const void *)(target), c, target_len))
2314
2315/* String ops must return a string. */
2316/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002317Py_LOCAL(PyStringObject *)
2318return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002319{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002320 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002321 Py_INCREF(self);
2322 return self;
2323 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002324 return (PyStringObject *)PyString_FromStringAndSize(
2325 PyString_AS_STRING(self),
2326 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002327}
2328
2329Py_LOCAL_INLINE(Py_ssize_t)
2330countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2331{
2332 Py_ssize_t count=0;
2333 const char *start=target;
2334 const char *end=target+target_len;
2335
2336 while ( (start=findchar(start, end-start, c)) != NULL ) {
2337 count++;
2338 if (count >= maxcount)
2339 break;
2340 start += 1;
2341 }
2342 return count;
2343}
2344
Christian Heimes44720832008-05-26 13:01:01 +00002345
2346/* Algorithms for different cases of string replacement */
2347
2348/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002349Py_LOCAL(PyStringObject *)
2350replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002351 const char *to_s, Py_ssize_t to_len,
2352 Py_ssize_t maxcount)
2353{
2354 char *self_s, *result_s;
2355 Py_ssize_t self_len, result_len;
2356 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002357 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002358
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002359 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002360
2361 /* 1 at the end plus 1 after every character */
2362 count = self_len+1;
2363 if (maxcount < count)
2364 count = maxcount;
2365
2366 /* Check for overflow */
2367 /* result_len = count * to_len + self_len; */
2368 product = count * to_len;
2369 if (product / to_len != count) {
2370 PyErr_SetString(PyExc_OverflowError,
2371 "replace string is too long");
2372 return NULL;
2373 }
2374 result_len = product + self_len;
2375 if (result_len < 0) {
2376 PyErr_SetString(PyExc_OverflowError,
2377 "replace string is too long");
2378 return NULL;
2379 }
2380
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002381 if (! (result = (PyStringObject *)
2382 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002383 return NULL;
2384
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002385 self_s = PyString_AS_STRING(self);
2386 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002387
2388 /* TODO: special case single character, which doesn't need memcpy */
2389
2390 /* Lay the first one down (guaranteed this will occur) */
2391 Py_MEMCPY(result_s, to_s, to_len);
2392 result_s += to_len;
2393 count -= 1;
2394
2395 for (i=0; i<count; i++) {
2396 *result_s++ = *self_s++;
2397 Py_MEMCPY(result_s, to_s, to_len);
2398 result_s += to_len;
2399 }
2400
2401 /* Copy the rest of the original string */
2402 Py_MEMCPY(result_s, self_s, self_len-i);
2403
2404 return result;
2405}
2406
2407/* Special case for deleting a single character */
2408/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002409Py_LOCAL(PyStringObject *)
2410replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002411 char from_c, Py_ssize_t maxcount)
2412{
2413 char *self_s, *result_s;
2414 char *start, *next, *end;
2415 Py_ssize_t self_len, result_len;
2416 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002417 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002418
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002419 self_len = PyString_GET_SIZE(self);
2420 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002421
2422 count = countchar(self_s, self_len, from_c, maxcount);
2423 if (count == 0) {
2424 return return_self(self);
2425 }
2426
2427 result_len = self_len - count; /* from_len == 1 */
2428 assert(result_len>=0);
2429
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002430 if ( (result = (PyStringObject *)
2431 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002432 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002433 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002434
2435 start = self_s;
2436 end = self_s + self_len;
2437 while (count-- > 0) {
2438 next = findchar(start, end-start, from_c);
2439 if (next == NULL)
2440 break;
2441 Py_MEMCPY(result_s, start, next-start);
2442 result_s += (next-start);
2443 start = next+1;
2444 }
2445 Py_MEMCPY(result_s, start, end-start);
2446
2447 return result;
2448}
2449
2450/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2451
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002452Py_LOCAL(PyStringObject *)
2453replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002454 const char *from_s, Py_ssize_t from_len,
2455 Py_ssize_t maxcount) {
2456 char *self_s, *result_s;
2457 char *start, *next, *end;
2458 Py_ssize_t self_len, result_len;
2459 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002460 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002462 self_len = PyString_GET_SIZE(self);
2463 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002464
Antoine Pitrou64672132010-01-13 07:55:48 +00002465 count = stringlib_count(self_s, self_len,
2466 from_s, from_len,
2467 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002468
2469 if (count == 0) {
2470 /* no matches */
2471 return return_self(self);
2472 }
2473
2474 result_len = self_len - (count * from_len);
2475 assert (result_len>=0);
2476
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002477 if ( (result = (PyStringObject *)
2478 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002479 return NULL;
2480
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002481 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002482
2483 start = self_s;
2484 end = self_s + self_len;
2485 while (count-- > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002486 offset = stringlib_find(start, end-start,
2487 from_s, from_len,
2488 0);
Christian Heimes44720832008-05-26 13:01:01 +00002489 if (offset == -1)
2490 break;
2491 next = start + offset;
2492
2493 Py_MEMCPY(result_s, start, next-start);
2494
2495 result_s += (next-start);
2496 start = next+from_len;
2497 }
2498 Py_MEMCPY(result_s, start, end-start);
2499 return result;
2500}
2501
2502/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002503Py_LOCAL(PyStringObject *)
2504replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002505 char from_c, char to_c,
2506 Py_ssize_t maxcount)
2507{
2508 char *self_s, *result_s, *start, *end, *next;
2509 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002510 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002511
2512 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002513 self_s = PyString_AS_STRING(self);
2514 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002515
2516 next = findchar(self_s, self_len, from_c);
2517
2518 if (next == NULL) {
2519 /* No matches; return the original string */
2520 return return_self(self);
2521 }
2522
2523 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002524 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002525 if (result == NULL)
2526 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002527 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002528 Py_MEMCPY(result_s, self_s, self_len);
2529
2530 /* change everything in-place, starting with this one */
2531 start = result_s + (next-self_s);
2532 *start = to_c;
2533 start++;
2534 end = result_s + self_len;
2535
2536 while (--maxcount > 0) {
2537 next = findchar(start, end-start, from_c);
2538 if (next == NULL)
2539 break;
2540 *next = to_c;
2541 start = next+1;
2542 }
2543
2544 return result;
2545}
2546
2547/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002548Py_LOCAL(PyStringObject *)
2549replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002550 const char *from_s, Py_ssize_t from_len,
2551 const char *to_s, Py_ssize_t to_len,
2552 Py_ssize_t maxcount)
2553{
2554 char *result_s, *start, *end;
2555 char *self_s;
2556 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002557 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002558
2559 /* The result string will be the same size */
2560
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002561 self_s = PyString_AS_STRING(self);
2562 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002563
Antoine Pitrou64672132010-01-13 07:55:48 +00002564 offset = stringlib_find(self_s, self_len,
2565 from_s, from_len,
2566 0);
Christian Heimes44720832008-05-26 13:01:01 +00002567 if (offset == -1) {
2568 /* No matches; return the original string */
2569 return return_self(self);
2570 }
2571
2572 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002573 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002574 if (result == NULL)
2575 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002576 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002577 Py_MEMCPY(result_s, self_s, self_len);
2578
2579 /* change everything in-place, starting with this one */
2580 start = result_s + offset;
2581 Py_MEMCPY(start, to_s, from_len);
2582 start += from_len;
2583 end = result_s + self_len;
2584
2585 while ( --maxcount > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002586 offset = stringlib_find(start, end-start,
2587 from_s, from_len,
2588 0);
Christian Heimes44720832008-05-26 13:01:01 +00002589 if (offset==-1)
2590 break;
2591 Py_MEMCPY(start+offset, to_s, from_len);
2592 start += offset+from_len;
2593 }
2594
2595 return result;
2596}
2597
2598/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002599Py_LOCAL(PyStringObject *)
2600replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002601 char from_c,
2602 const char *to_s, Py_ssize_t to_len,
2603 Py_ssize_t maxcount)
2604{
2605 char *self_s, *result_s;
2606 char *start, *next, *end;
2607 Py_ssize_t self_len, result_len;
2608 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002609 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002610
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002611 self_s = PyString_AS_STRING(self);
2612 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002613
2614 count = countchar(self_s, self_len, from_c, maxcount);
2615 if (count == 0) {
2616 /* no matches, return unchanged */
2617 return return_self(self);
2618 }
2619
2620 /* use the difference between current and new, hence the "-1" */
2621 /* result_len = self_len + count * (to_len-1) */
2622 product = count * (to_len-1);
2623 if (product / (to_len-1) != count) {
2624 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2625 return NULL;
2626 }
2627 result_len = self_len + product;
2628 if (result_len < 0) {
2629 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630 return NULL;
2631 }
2632
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002633 if ( (result = (PyStringObject *)
2634 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002635 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002636 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002637
2638 start = self_s;
2639 end = self_s + self_len;
2640 while (count-- > 0) {
2641 next = findchar(start, end-start, from_c);
2642 if (next == NULL)
2643 break;
2644
2645 if (next == start) {
2646 /* replace with the 'to' */
2647 Py_MEMCPY(result_s, to_s, to_len);
2648 result_s += to_len;
2649 start += 1;
2650 } else {
2651 /* copy the unchanged old then the 'to' */
2652 Py_MEMCPY(result_s, start, next-start);
2653 result_s += (next-start);
2654 Py_MEMCPY(result_s, to_s, to_len);
2655 result_s += to_len;
2656 start = next+1;
2657 }
2658 }
2659 /* Copy the remainder of the remaining string */
2660 Py_MEMCPY(result_s, start, end-start);
2661
2662 return result;
2663}
2664
2665/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002666Py_LOCAL(PyStringObject *)
2667replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002668 const char *from_s, Py_ssize_t from_len,
2669 const char *to_s, Py_ssize_t to_len,
2670 Py_ssize_t maxcount) {
2671 char *self_s, *result_s;
2672 char *start, *next, *end;
2673 Py_ssize_t self_len, result_len;
2674 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002675 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002676
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002677 self_s = PyString_AS_STRING(self);
2678 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002679
Antoine Pitrou64672132010-01-13 07:55:48 +00002680 count = stringlib_count(self_s, self_len,
2681 from_s, from_len,
2682 maxcount);
2683
Christian Heimes44720832008-05-26 13:01:01 +00002684 if (count == 0) {
2685 /* no matches, return unchanged */
2686 return return_self(self);
2687 }
2688
2689 /* Check for overflow */
2690 /* result_len = self_len + count * (to_len-from_len) */
2691 product = count * (to_len-from_len);
2692 if (product / (to_len-from_len) != count) {
2693 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2694 return NULL;
2695 }
2696 result_len = self_len + product;
2697 if (result_len < 0) {
2698 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699 return NULL;
2700 }
2701
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002702 if ( (result = (PyStringObject *)
2703 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002704 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002705 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002706
2707 start = self_s;
2708 end = self_s + self_len;
2709 while (count-- > 0) {
Antoine Pitrou64672132010-01-13 07:55:48 +00002710 offset = stringlib_find(start, end-start,
2711 from_s, from_len,
2712 0);
Christian Heimes44720832008-05-26 13:01:01 +00002713 if (offset == -1)
2714 break;
2715 next = start+offset;
2716 if (next == start) {
2717 /* replace with the 'to' */
2718 Py_MEMCPY(result_s, to_s, to_len);
2719 result_s += to_len;
2720 start += from_len;
2721 } else {
2722 /* copy the unchanged old then the 'to' */
2723 Py_MEMCPY(result_s, start, next-start);
2724 result_s += (next-start);
2725 Py_MEMCPY(result_s, to_s, to_len);
2726 result_s += to_len;
2727 start = next+from_len;
2728 }
2729 }
2730 /* Copy the remainder of the remaining string */
2731 Py_MEMCPY(result_s, start, end-start);
2732
2733 return result;
2734}
2735
2736
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002737Py_LOCAL(PyStringObject *)
2738replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002739 const char *from_s, Py_ssize_t from_len,
2740 const char *to_s, Py_ssize_t to_len,
2741 Py_ssize_t maxcount)
2742{
2743 if (maxcount < 0) {
2744 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002745 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00002746 /* nothing to do; return the original string */
2747 return return_self(self);
2748 }
2749
2750 if (maxcount == 0 ||
2751 (from_len == 0 && to_len == 0)) {
2752 /* nothing to do; return the original string */
2753 return return_self(self);
2754 }
2755
2756 /* Handle zero-length special cases */
2757
2758 if (from_len == 0) {
2759 /* insert the 'to' string everywhere. */
2760 /* >>> "Python".replace("", ".") */
2761 /* '.P.y.t.h.o.n.' */
2762 return replace_interleave(self, to_s, to_len, maxcount);
2763 }
2764
2765 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2766 /* point for an empty self string to generate a non-empty string */
2767 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002768 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00002769 return return_self(self);
2770 }
2771
2772 if (to_len == 0) {
2773 /* delete all occurances of 'from' string */
2774 if (from_len == 1) {
2775 return replace_delete_single_character(
2776 self, from_s[0], maxcount);
2777 } else {
2778 return replace_delete_substring(self, from_s, from_len, maxcount);
2779 }
2780 }
2781
2782 /* Handle special case where both strings have the same length */
2783
2784 if (from_len == to_len) {
2785 if (from_len == 1) {
2786 return replace_single_character_in_place(
2787 self,
2788 from_s[0],
2789 to_s[0],
2790 maxcount);
2791 } else {
2792 return replace_substring_in_place(
2793 self, from_s, from_len, to_s, to_len, maxcount);
2794 }
2795 }
2796
2797 /* Otherwise use the more generic algorithms */
2798 if (from_len == 1) {
2799 return replace_single_character(self, from_s[0],
2800 to_s, to_len, maxcount);
2801 } else {
2802 /* len('from')>=2, len('to')>=1 */
2803 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2804 }
2805}
2806
2807PyDoc_STRVAR(replace__doc__,
2808"S.replace (old, new[, count]) -> string\n\
2809\n\
2810Return a copy of string S with all occurrences of substring\n\
2811old replaced by new. If the optional argument count is\n\
2812given, only the first count occurrences are replaced.");
2813
2814static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002815string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002816{
2817 Py_ssize_t count = -1;
2818 PyObject *from, *to;
2819 const char *from_s, *to_s;
2820 Py_ssize_t from_len, to_len;
2821
2822 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2823 return NULL;
2824
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002825 if (PyString_Check(from)) {
2826 from_s = PyString_AS_STRING(from);
2827 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00002828 }
2829#ifdef Py_USING_UNICODE
2830 if (PyUnicode_Check(from))
2831 return PyUnicode_Replace((PyObject *)self,
2832 from, to, count);
2833#endif
2834 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2835 return NULL;
2836
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002837 if (PyString_Check(to)) {
2838 to_s = PyString_AS_STRING(to);
2839 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00002840 }
2841#ifdef Py_USING_UNICODE
2842 else if (PyUnicode_Check(to))
2843 return PyUnicode_Replace((PyObject *)self,
2844 from, to, count);
2845#endif
2846 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2847 return NULL;
2848
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002849 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00002850 from_s, from_len,
2851 to_s, to_len, count);
2852}
2853
2854/** End DALKE **/
2855
2856/* Matches the end (direction >= 0) or start (direction < 0) of self
2857 * against substr, using the start and end arguments. Returns
2858 * -1 on error, 0 if not found and 1 if found.
2859 */
2860Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002861_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00002862 Py_ssize_t end, int direction)
2863{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002864 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002865 Py_ssize_t slen;
2866 const char* sub;
2867 const char* str;
2868
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002869 if (PyString_Check(substr)) {
2870 sub = PyString_AS_STRING(substr);
2871 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00002872 }
2873#ifdef Py_USING_UNICODE
2874 else if (PyUnicode_Check(substr))
2875 return PyUnicode_Tailmatch((PyObject *)self,
2876 substr, start, end, direction);
2877#endif
2878 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2879 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002880 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002881
Antoine Pitrou64672132010-01-13 07:55:48 +00002882 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002883
2884 if (direction < 0) {
2885 /* startswith */
2886 if (start+slen > len)
2887 return 0;
2888 } else {
2889 /* endswith */
2890 if (end-start < slen || start > len)
2891 return 0;
2892
2893 if (end-slen > start)
2894 start = end - slen;
2895 }
2896 if (end-start >= slen)
2897 return ! memcmp(str+start, sub, slen);
2898 return 0;
2899}
2900
2901
2902PyDoc_STRVAR(startswith__doc__,
2903"S.startswith(prefix[, start[, end]]) -> bool\n\
2904\n\
2905Return True if S starts with the specified prefix, False otherwise.\n\
2906With optional start, test S beginning at that position.\n\
2907With optional end, stop comparing S at that position.\n\
2908prefix can also be a tuple of strings to try.");
2909
2910static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002911string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002912{
2913 Py_ssize_t start = 0;
2914 Py_ssize_t end = PY_SSIZE_T_MAX;
2915 PyObject *subobj;
2916 int result;
2917
2918 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2919 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2920 return NULL;
2921 if (PyTuple_Check(subobj)) {
2922 Py_ssize_t i;
2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924 result = _string_tailmatch(self,
2925 PyTuple_GET_ITEM(subobj, i),
2926 start, end, -1);
2927 if (result == -1)
2928 return NULL;
2929 else if (result) {
2930 Py_RETURN_TRUE;
2931 }
2932 }
2933 Py_RETURN_FALSE;
2934 }
2935 result = _string_tailmatch(self, subobj, start, end, -1);
2936 if (result == -1)
2937 return NULL;
2938 else
2939 return PyBool_FromLong(result);
2940}
2941
2942
2943PyDoc_STRVAR(endswith__doc__,
2944"S.endswith(suffix[, start[, end]]) -> bool\n\
2945\n\
2946Return True if S ends with the specified suffix, False otherwise.\n\
2947With optional start, test S beginning at that position.\n\
2948With optional end, stop comparing S at that position.\n\
2949suffix can also be a tuple of strings to try.");
2950
2951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002952string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002953{
2954 Py_ssize_t start = 0;
2955 Py_ssize_t end = PY_SSIZE_T_MAX;
2956 PyObject *subobj;
2957 int result;
2958
2959 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2960 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2961 return NULL;
2962 if (PyTuple_Check(subobj)) {
2963 Py_ssize_t i;
2964 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2965 result = _string_tailmatch(self,
2966 PyTuple_GET_ITEM(subobj, i),
2967 start, end, +1);
2968 if (result == -1)
2969 return NULL;
2970 else if (result) {
2971 Py_RETURN_TRUE;
2972 }
2973 }
2974 Py_RETURN_FALSE;
2975 }
2976 result = _string_tailmatch(self, subobj, start, end, +1);
2977 if (result == -1)
2978 return NULL;
2979 else
2980 return PyBool_FromLong(result);
2981}
2982
2983
2984PyDoc_STRVAR(encode__doc__,
2985"S.encode([encoding[,errors]]) -> object\n\
2986\n\
2987Encodes S using the codec registered for encoding. encoding defaults\n\
2988to the default encoding. errors may be given to set a different error\n\
2989handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2990a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2991'xmlcharrefreplace' as well as any other name registered with\n\
2992codecs.register_error that is able to handle UnicodeEncodeErrors.");
2993
2994static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002995string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002996{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002997 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002998 char *encoding = NULL;
2999 char *errors = NULL;
3000 PyObject *v;
3001
Benjamin Peterson332d7212009-09-18 21:14:55 +00003002 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3003 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003004 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003005 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003006 if (v == NULL)
3007 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003008 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003009 PyErr_Format(PyExc_TypeError,
3010 "encoder did not return a string/unicode object "
3011 "(type=%.400s)",
3012 Py_TYPE(v)->tp_name);
3013 Py_DECREF(v);
3014 return NULL;
3015 }
3016 return v;
3017
3018 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003019 return NULL;
3020}
3021
Christian Heimes44720832008-05-26 13:01:01 +00003022
3023PyDoc_STRVAR(decode__doc__,
3024"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003025\n\
Christian Heimes44720832008-05-26 13:01:01 +00003026Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003027to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003028handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3029a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003030as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003031able to handle UnicodeDecodeErrors.");
3032
3033static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003034string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003035{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003036 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003037 char *encoding = NULL;
3038 char *errors = NULL;
3039 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003040
Benjamin Peterson332d7212009-09-18 21:14:55 +00003041 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3042 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003043 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003044 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003045 if (v == NULL)
3046 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003047 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003048 PyErr_Format(PyExc_TypeError,
3049 "decoder did not return a string/unicode object "
3050 "(type=%.400s)",
3051 Py_TYPE(v)->tp_name);
3052 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003053 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003054 }
3055 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003056
Christian Heimes44720832008-05-26 13:01:01 +00003057 onError:
3058 return NULL;
3059}
3060
3061
3062PyDoc_STRVAR(expandtabs__doc__,
3063"S.expandtabs([tabsize]) -> string\n\
3064\n\
3065Return a copy of S where all tab characters are expanded using spaces.\n\
3066If tabsize is not given, a tab size of 8 characters is assumed.");
3067
3068static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003069string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003070{
3071 const char *e, *p, *qe;
3072 char *q;
3073 Py_ssize_t i, j, incr;
3074 PyObject *u;
3075 int tabsize = 8;
3076
3077 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3078 return NULL;
3079
3080 /* First pass: determine size of output string */
3081 i = 0; /* chars up to and including most recent \n or \r */
3082 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003083 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3084 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003085 if (*p == '\t') {
3086 if (tabsize > 0) {
3087 incr = tabsize - (j % tabsize);
3088 if (j > PY_SSIZE_T_MAX - incr)
3089 goto overflow1;
3090 j += incr;
3091 }
3092 }
3093 else {
3094 if (j > PY_SSIZE_T_MAX - 1)
3095 goto overflow1;
3096 j++;
3097 if (*p == '\n' || *p == '\r') {
3098 if (i > PY_SSIZE_T_MAX - j)
3099 goto overflow1;
3100 i += j;
3101 j = 0;
3102 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003103 }
Christian Heimes44720832008-05-26 13:01:01 +00003104
3105 if (i > PY_SSIZE_T_MAX - j)
3106 goto overflow1;
3107
3108 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003109 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003110 if (!u)
3111 return NULL;
3112
3113 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003114 q = PyString_AS_STRING(u); /* next output char */
3115 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003116
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003117 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003118 if (*p == '\t') {
3119 if (tabsize > 0) {
3120 i = tabsize - (j % tabsize);
3121 j += i;
3122 while (i--) {
3123 if (q >= qe)
3124 goto overflow2;
3125 *q++ = ' ';
3126 }
3127 }
3128 }
3129 else {
3130 if (q >= qe)
3131 goto overflow2;
3132 *q++ = *p;
3133 j++;
3134 if (*p == '\n' || *p == '\r')
3135 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003136 }
Christian Heimes44720832008-05-26 13:01:01 +00003137
3138 return u;
3139
3140 overflow2:
3141 Py_DECREF(u);
3142 overflow1:
3143 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3144 return NULL;
3145}
3146
3147Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003148pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003149{
3150 PyObject *u;
3151
3152 if (left < 0)
3153 left = 0;
3154 if (right < 0)
3155 right = 0;
3156
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003157 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003158 Py_INCREF(self);
3159 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003160 }
3161
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003162 u = PyString_FromStringAndSize(NULL,
3163 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003164 if (u) {
3165 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003166 memset(PyString_AS_STRING(u), fill, left);
3167 Py_MEMCPY(PyString_AS_STRING(u) + left,
3168 PyString_AS_STRING(self),
3169 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003170 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003171 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003172 fill, right);
3173 }
3174
3175 return u;
3176}
3177
3178PyDoc_STRVAR(ljust__doc__,
3179"S.ljust(width[, fillchar]) -> string\n"
3180"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003181"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003182"done using the specified fill character (default is a space).");
3183
3184static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003185string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003186{
3187 Py_ssize_t width;
3188 char fillchar = ' ';
3189
3190 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3191 return NULL;
3192
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003193 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003194 Py_INCREF(self);
3195 return (PyObject*) self;
3196 }
3197
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003198 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003199}
3200
3201
3202PyDoc_STRVAR(rjust__doc__,
3203"S.rjust(width[, fillchar]) -> string\n"
3204"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003205"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003206"done using the specified fill character (default is a space)");
3207
3208static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003209string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003210{
3211 Py_ssize_t width;
3212 char fillchar = ' ';
3213
3214 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3215 return NULL;
3216
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003217 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003218 Py_INCREF(self);
3219 return (PyObject*) self;
3220 }
3221
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003222 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003223}
3224
3225
3226PyDoc_STRVAR(center__doc__,
3227"S.center(width[, fillchar]) -> string\n"
3228"\n"
3229"Return S centered in a string of length width. Padding is\n"
3230"done using the specified fill character (default is a space)");
3231
3232static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003233string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003234{
3235 Py_ssize_t marg, left;
3236 Py_ssize_t width;
3237 char fillchar = ' ';
3238
3239 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3240 return NULL;
3241
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003242 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003243 Py_INCREF(self);
3244 return (PyObject*) self;
3245 }
3246
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003247 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003248 left = marg / 2 + (marg & width & 1);
3249
3250 return pad(self, left, marg - left, fillchar);
3251}
3252
3253PyDoc_STRVAR(zfill__doc__,
3254"S.zfill(width) -> string\n"
3255"\n"
3256"Pad a numeric string S with zeros on the left, to fill a field\n"
3257"of the specified width. The string S is never truncated.");
3258
3259static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003260string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003261{
3262 Py_ssize_t fill;
3263 PyObject *s;
3264 char *p;
3265 Py_ssize_t width;
3266
3267 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3268 return NULL;
3269
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003270 if (PyString_GET_SIZE(self) >= width) {
3271 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003272 Py_INCREF(self);
3273 return (PyObject*) self;
3274 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003275 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003276 return PyString_FromStringAndSize(
3277 PyString_AS_STRING(self),
3278 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003279 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003280 }
3281
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003282 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003283
Christian Heimes44720832008-05-26 13:01:01 +00003284 s = pad(self, fill, 0, '0');
3285
3286 if (s == NULL)
3287 return NULL;
3288
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003289 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003290 if (p[fill] == '+' || p[fill] == '-') {
3291 /* move sign to beginning of string */
3292 p[0] = p[fill];
3293 p[fill] = '0';
3294 }
3295
3296 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003297}
3298
Christian Heimes44720832008-05-26 13:01:01 +00003299PyDoc_STRVAR(isspace__doc__,
3300"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003301\n\
Christian Heimes44720832008-05-26 13:01:01 +00003302Return True if all characters in S are whitespace\n\
3303and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003304
Christian Heimes44720832008-05-26 13:01:01 +00003305static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003306string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003307{
Christian Heimes44720832008-05-26 13:01:01 +00003308 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003309 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003310 register const unsigned char *e;
3311
3312 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003313 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003314 isspace(*p))
3315 return PyBool_FromLong(1);
3316
3317 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003318 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003319 return PyBool_FromLong(0);
3320
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003321 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003322 for (; p < e; p++) {
3323 if (!isspace(*p))
3324 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003325 }
Christian Heimes44720832008-05-26 13:01:01 +00003326 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003327}
3328
Christian Heimes44720832008-05-26 13:01:01 +00003329
3330PyDoc_STRVAR(isalpha__doc__,
3331"S.isalpha() -> bool\n\
3332\n\
3333Return True if all characters in S are alphabetic\n\
3334and there is at least one character in S, False otherwise.");
3335
3336static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003337string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003338{
Christian Heimes44720832008-05-26 13:01:01 +00003339 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003340 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003341 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003342
Christian Heimes44720832008-05-26 13:01:01 +00003343 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003344 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003345 isalpha(*p))
3346 return PyBool_FromLong(1);
3347
3348 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003349 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003350 return PyBool_FromLong(0);
3351
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003352 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003353 for (; p < e; p++) {
3354 if (!isalpha(*p))
3355 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003356 }
Christian Heimes44720832008-05-26 13:01:01 +00003357 return PyBool_FromLong(1);
3358}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003359
Christian Heimes44720832008-05-26 13:01:01 +00003360
3361PyDoc_STRVAR(isalnum__doc__,
3362"S.isalnum() -> bool\n\
3363\n\
3364Return True if all characters in S are alphanumeric\n\
3365and there is at least one character in S, False otherwise.");
3366
3367static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003368string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003369{
3370 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003371 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003372 register const unsigned char *e;
3373
3374 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003375 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003376 isalnum(*p))
3377 return PyBool_FromLong(1);
3378
3379 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003380 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003381 return PyBool_FromLong(0);
3382
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003383 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003384 for (; p < e; p++) {
3385 if (!isalnum(*p))
3386 return PyBool_FromLong(0);
3387 }
3388 return PyBool_FromLong(1);
3389}
3390
3391
3392PyDoc_STRVAR(isdigit__doc__,
3393"S.isdigit() -> bool\n\
3394\n\
3395Return True if all characters in S are digits\n\
3396and there is at least one character in S, False otherwise.");
3397
3398static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003399string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003400{
3401 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003402 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003403 register const unsigned char *e;
3404
3405 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003406 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003407 isdigit(*p))
3408 return PyBool_FromLong(1);
3409
3410 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003411 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003412 return PyBool_FromLong(0);
3413
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003414 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003415 for (; p < e; p++) {
3416 if (!isdigit(*p))
3417 return PyBool_FromLong(0);
3418 }
3419 return PyBool_FromLong(1);
3420}
3421
3422
3423PyDoc_STRVAR(islower__doc__,
3424"S.islower() -> bool\n\
3425\n\
3426Return True if all cased characters in S are lowercase and there is\n\
3427at least one cased character in S, False otherwise.");
3428
3429static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003430string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003431{
3432 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003433 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003434 register const unsigned char *e;
3435 int cased;
3436
3437 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003438 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003439 return PyBool_FromLong(islower(*p) != 0);
3440
3441 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003442 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003443 return PyBool_FromLong(0);
3444
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003445 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003446 cased = 0;
3447 for (; p < e; p++) {
3448 if (isupper(*p))
3449 return PyBool_FromLong(0);
3450 else if (!cased && islower(*p))
3451 cased = 1;
3452 }
3453 return PyBool_FromLong(cased);
3454}
3455
3456
3457PyDoc_STRVAR(isupper__doc__,
3458"S.isupper() -> bool\n\
3459\n\
3460Return True if all cased characters in S are uppercase and there is\n\
3461at least one cased character in S, False otherwise.");
3462
3463static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003464string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003465{
3466 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003467 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003468 register const unsigned char *e;
3469 int cased;
3470
3471 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003472 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003473 return PyBool_FromLong(isupper(*p) != 0);
3474
3475 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003476 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003477 return PyBool_FromLong(0);
3478
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003479 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003480 cased = 0;
3481 for (; p < e; p++) {
3482 if (islower(*p))
3483 return PyBool_FromLong(0);
3484 else if (!cased && isupper(*p))
3485 cased = 1;
3486 }
3487 return PyBool_FromLong(cased);
3488}
3489
3490
3491PyDoc_STRVAR(istitle__doc__,
3492"S.istitle() -> bool\n\
3493\n\
3494Return True if S is a titlecased string and there is at least one\n\
3495character in S, i.e. uppercase characters may only follow uncased\n\
3496characters and lowercase characters only cased ones. Return False\n\
3497otherwise.");
3498
3499static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003500string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003501{
3502 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003503 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003504 register const unsigned char *e;
3505 int cased, previous_is_cased;
3506
3507 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003508 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003509 return PyBool_FromLong(isupper(*p) != 0);
3510
3511 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003512 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003513 return PyBool_FromLong(0);
3514
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003515 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003516 cased = 0;
3517 previous_is_cased = 0;
3518 for (; p < e; p++) {
3519 register const unsigned char ch = *p;
3520
3521 if (isupper(ch)) {
3522 if (previous_is_cased)
3523 return PyBool_FromLong(0);
3524 previous_is_cased = 1;
3525 cased = 1;
3526 }
3527 else if (islower(ch)) {
3528 if (!previous_is_cased)
3529 return PyBool_FromLong(0);
3530 previous_is_cased = 1;
3531 cased = 1;
3532 }
3533 else
3534 previous_is_cased = 0;
3535 }
3536 return PyBool_FromLong(cased);
3537}
3538
3539
3540PyDoc_STRVAR(splitlines__doc__,
3541"S.splitlines([keepends]) -> list of strings\n\
3542\n\
3543Return a list of the lines in S, breaking at line boundaries.\n\
3544Line breaks are not included in the resulting list unless keepends\n\
3545is given and true.");
3546
3547static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003548string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003549{
Christian Heimes44720832008-05-26 13:01:01 +00003550 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003551
3552 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3553 return NULL;
3554
Antoine Pitrou64672132010-01-13 07:55:48 +00003555 return stringlib_splitlines(
3556 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3557 keepends
3558 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003559}
3560
Robert Schuppenies51df0642008-06-01 16:16:17 +00003561PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003562"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003563
3564static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003565string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003566{
3567 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00003568 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003569 return PyInt_FromSsize_t(res);
3570}
3571
Christian Heimes1a6387e2008-03-26 12:49:49 +00003572static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003573string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003574{
Christian Heimes44720832008-05-26 13:01:01 +00003575 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003576}
3577
Christian Heimes1a6387e2008-03-26 12:49:49 +00003578
Christian Heimes44720832008-05-26 13:01:01 +00003579#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003580
Christian Heimes44720832008-05-26 13:01:01 +00003581PyDoc_STRVAR(format__doc__,
3582"S.format(*args, **kwargs) -> unicode\n\
3583\n\
3584");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003585
Eric Smithdc13b792008-05-30 18:10:04 +00003586static PyObject *
3587string__format__(PyObject* self, PyObject* args)
3588{
3589 PyObject *format_spec;
3590 PyObject *result = NULL;
3591 PyObject *tmp = NULL;
3592
3593 /* If 2.x, convert format_spec to the same type as value */
3594 /* This is to allow things like u''.format('') */
3595 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3596 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003597 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003598 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3599 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3600 goto done;
3601 }
3602 tmp = PyObject_Str(format_spec);
3603 if (tmp == NULL)
3604 goto done;
3605 format_spec = tmp;
3606
3607 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003608 PyString_AS_STRING(format_spec),
3609 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003610done:
3611 Py_XDECREF(tmp);
3612 return result;
3613}
3614
Christian Heimes44720832008-05-26 13:01:01 +00003615PyDoc_STRVAR(p_format__doc__,
3616"S.__format__(format_spec) -> unicode\n\
3617\n\
3618");
3619
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003620
Christian Heimes1a6387e2008-03-26 12:49:49 +00003621static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003622string_methods[] = {
3623 /* Counterparts of the obsolete stropmodule functions; except
3624 string.maketrans(). */
3625 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3626 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3627 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3628 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3629 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3630 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3631 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3632 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3633 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3634 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3635 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3636 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3637 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3638 capitalize__doc__},
3639 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3640 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3641 endswith__doc__},
3642 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3643 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3644 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3645 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3646 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3647 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3648 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3649 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3650 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3651 rpartition__doc__},
3652 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3653 startswith__doc__},
3654 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3655 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3656 swapcase__doc__},
3657 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3658 translate__doc__},
3659 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3660 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3661 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3662 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3663 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3664 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3665 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3666 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3667 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00003668 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3669 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00003670 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3671 expandtabs__doc__},
3672 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3673 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00003674 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3675 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00003676 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3677 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003678};
3679
3680static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003681str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003682
Christian Heimes44720832008-05-26 13:01:01 +00003683static PyObject *
3684string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3685{
3686 PyObject *x = NULL;
3687 static char *kwlist[] = {"object", 0};
3688
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003689 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00003690 return str_subtype_new(type, args, kwds);
3691 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3692 return NULL;
3693 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003694 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00003695 return PyObject_Str(x);
3696}
3697
3698static PyObject *
3699str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3700{
3701 PyObject *tmp, *pnew;
3702 Py_ssize_t n;
3703
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003704 assert(PyType_IsSubtype(type, &PyString_Type));
3705 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00003706 if (tmp == NULL)
3707 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003708 assert(PyString_CheckExact(tmp));
3709 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00003710 pnew = type->tp_alloc(type, n);
3711 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003712 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3713 ((PyStringObject *)pnew)->ob_shash =
3714 ((PyStringObject *)tmp)->ob_shash;
3715 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00003716 }
3717 Py_DECREF(tmp);
3718 return pnew;
3719}
3720
3721static PyObject *
3722basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3723{
3724 PyErr_SetString(PyExc_TypeError,
3725 "The basestring type cannot be instantiated");
3726 return NULL;
3727}
3728
3729static PyObject *
3730string_mod(PyObject *v, PyObject *w)
3731{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003732 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003733 Py_INCREF(Py_NotImplemented);
3734 return Py_NotImplemented;
3735 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003736 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003737}
3738
3739PyDoc_STRVAR(basestring_doc,
3740"Type basestring cannot be instantiated; it is the base for str and unicode.");
3741
3742static PyNumberMethods string_as_number = {
3743 0, /*nb_add*/
3744 0, /*nb_subtract*/
3745 0, /*nb_multiply*/
3746 0, /*nb_divide*/
3747 string_mod, /*nb_remainder*/
3748};
3749
3750
3751PyTypeObject PyBaseString_Type = {
3752 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3753 "basestring",
3754 0,
3755 0,
3756 0, /* tp_dealloc */
3757 0, /* tp_print */
3758 0, /* tp_getattr */
3759 0, /* tp_setattr */
3760 0, /* tp_compare */
3761 0, /* tp_repr */
3762 0, /* tp_as_number */
3763 0, /* tp_as_sequence */
3764 0, /* tp_as_mapping */
3765 0, /* tp_hash */
3766 0, /* tp_call */
3767 0, /* tp_str */
3768 0, /* tp_getattro */
3769 0, /* tp_setattro */
3770 0, /* tp_as_buffer */
3771 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3772 basestring_doc, /* tp_doc */
3773 0, /* tp_traverse */
3774 0, /* tp_clear */
3775 0, /* tp_richcompare */
3776 0, /* tp_weaklistoffset */
3777 0, /* tp_iter */
3778 0, /* tp_iternext */
3779 0, /* tp_methods */
3780 0, /* tp_members */
3781 0, /* tp_getset */
3782 &PyBaseObject_Type, /* tp_base */
3783 0, /* tp_dict */
3784 0, /* tp_descr_get */
3785 0, /* tp_descr_set */
3786 0, /* tp_dictoffset */
3787 0, /* tp_init */
3788 0, /* tp_alloc */
3789 basestring_new, /* tp_new */
3790 0, /* tp_free */
3791};
3792
3793PyDoc_STRVAR(string_doc,
3794"str(object) -> string\n\
3795\n\
3796Return a nice string representation of the object.\n\
3797If the argument is a string, the return value is the same object.");
3798
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003799PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00003800 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3801 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00003802 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00003803 sizeof(char),
3804 string_dealloc, /* tp_dealloc */
3805 (printfunc)string_print, /* tp_print */
3806 0, /* tp_getattr */
3807 0, /* tp_setattr */
3808 0, /* tp_compare */
3809 string_repr, /* tp_repr */
3810 &string_as_number, /* tp_as_number */
3811 &string_as_sequence, /* tp_as_sequence */
3812 &string_as_mapping, /* tp_as_mapping */
3813 (hashfunc)string_hash, /* tp_hash */
3814 0, /* tp_call */
3815 string_str, /* tp_str */
3816 PyObject_GenericGetAttr, /* tp_getattro */
3817 0, /* tp_setattro */
3818 &string_as_buffer, /* tp_as_buffer */
3819 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3820 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3821 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3822 string_doc, /* tp_doc */
3823 0, /* tp_traverse */
3824 0, /* tp_clear */
3825 (richcmpfunc)string_richcompare, /* tp_richcompare */
3826 0, /* tp_weaklistoffset */
3827 0, /* tp_iter */
3828 0, /* tp_iternext */
3829 string_methods, /* tp_methods */
3830 0, /* tp_members */
3831 0, /* tp_getset */
3832 &PyBaseString_Type, /* tp_base */
3833 0, /* tp_dict */
3834 0, /* tp_descr_get */
3835 0, /* tp_descr_set */
3836 0, /* tp_dictoffset */
3837 0, /* tp_init */
3838 0, /* tp_alloc */
3839 string_new, /* tp_new */
3840 PyObject_Del, /* tp_free */
3841};
3842
3843void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003844PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003845{
3846 register PyObject *v;
3847 if (*pv == NULL)
3848 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003849 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00003850 Py_DECREF(*pv);
3851 *pv = NULL;
3852 return;
3853 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003854 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00003855 Py_DECREF(*pv);
3856 *pv = v;
3857}
3858
3859void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003860PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003861{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003862 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00003863 Py_XDECREF(w);
3864}
3865
3866
3867/* The following function breaks the notion that strings are immutable:
3868 it changes the size of a string. We get away with this only if there
3869 is only one module referencing the object. You can also think of it
3870 as creating a new string object and destroying the old one, only
3871 more efficiently. In any case, don't use this if the string may
3872 already be known to some other part of the code...
3873 Note that if there's not enough memory to resize the string, the original
3874 string object at *pv is deallocated, *pv is set to NULL, an "out of
3875 memory" exception is set, and -1 is returned. Else (on success) 0 is
3876 returned, and the value in *pv may or may not be the same as on input.
3877 As always, an extra byte is allocated for a trailing \0 byte (newsize
3878 does *not* include that), and a trailing \0 byte is stored.
3879*/
3880
3881int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003882_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003883{
3884 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003885 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00003886 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003887 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3888 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003889 *pv = 0;
3890 Py_DECREF(v);
3891 PyErr_BadInternalCall();
3892 return -1;
3893 }
3894 /* XXX UNREF/NEWREF interface should be more symmetrical */
3895 _Py_DEC_REFTOTAL;
3896 _Py_ForgetReference(v);
3897 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00003898 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00003899 if (*pv == NULL) {
3900 PyObject_Del(v);
3901 PyErr_NoMemory();
3902 return -1;
3903 }
3904 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003905 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00003906 Py_SIZE(sv) = newsize;
3907 sv->ob_sval[newsize] = '\0';
3908 sv->ob_shash = -1; /* invalidate cached hash value */
3909 return 0;
3910}
3911
3912/* Helpers for formatstring */
3913
3914Py_LOCAL_INLINE(PyObject *)
3915getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3916{
3917 Py_ssize_t argidx = *p_argidx;
3918 if (argidx < arglen) {
3919 (*p_argidx)++;
3920 if (arglen < 0)
3921 return args;
3922 else
3923 return PyTuple_GetItem(args, argidx);
3924 }
3925 PyErr_SetString(PyExc_TypeError,
3926 "not enough arguments for format string");
3927 return NULL;
3928}
3929
3930/* Format codes
3931 * F_LJUST '-'
3932 * F_SIGN '+'
3933 * F_BLANK ' '
3934 * F_ALT '#'
3935 * F_ZERO '0'
3936 */
3937#define F_LJUST (1<<0)
3938#define F_SIGN (1<<1)
3939#define F_BLANK (1<<2)
3940#define F_ALT (1<<3)
3941#define F_ZERO (1<<4)
3942
Mark Dickinson18cfada2009-11-23 18:46:41 +00003943/* Returns a new reference to a PyString object, or NULL on failure. */
3944
3945static PyObject *
3946formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003947{
Mark Dickinson18cfada2009-11-23 18:46:41 +00003948 char *p;
3949 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003950 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003951
Christian Heimes44720832008-05-26 13:01:01 +00003952 x = PyFloat_AsDouble(v);
3953 if (x == -1.0 && PyErr_Occurred()) {
3954 PyErr_Format(PyExc_TypeError, "float argument required, "
3955 "not %.200s", Py_TYPE(v)->tp_name);
Mark Dickinson18cfada2009-11-23 18:46:41 +00003956 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003957 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003958
Christian Heimes44720832008-05-26 13:01:01 +00003959 if (prec < 0)
3960 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003961
Mark Dickinson18cfada2009-11-23 18:46:41 +00003962 p = PyOS_double_to_string(x, type, prec,
3963 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003964
Mark Dickinson18cfada2009-11-23 18:46:41 +00003965 if (p == NULL)
3966 return NULL;
3967 result = PyString_FromStringAndSize(p, strlen(p));
3968 PyMem_Free(p);
3969 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003970}
3971
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003972/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003973 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3974 * Python's regular ints.
3975 * Return value: a new PyString*, or NULL if error.
3976 * . *pbuf is set to point into it,
3977 * *plen set to the # of chars following that.
3978 * Caller must decref it when done using pbuf.
3979 * The string starting at *pbuf is of the form
3980 * "-"? ("0x" | "0X")? digit+
3981 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3982 * set in flags. The case of hex digits will be correct,
3983 * There will be at least prec digits, zero-filled on the left if
3984 * necessary to get that many.
3985 * val object to be converted
3986 * flags bitmask of format flags; only F_ALT is looked at
3987 * prec minimum number of digits; 0-fill on left if needed
3988 * type a character in [duoxX]; u acts the same as d
3989 *
3990 * CAUTION: o, x and X conversions on regular ints can never
3991 * produce a '-' sign, but can for Python's unbounded ints.
3992 */
3993PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003994_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00003995 char **pbuf, int *plen)
3996{
3997 PyObject *result = NULL;
3998 char *buf;
3999 Py_ssize_t i;
4000 int sign; /* 1 if '-', else 0 */
4001 int len; /* number of characters */
4002 Py_ssize_t llen;
4003 int numdigits; /* len == numnondigits + numdigits */
4004 int numnondigits = 0;
4005
4006 switch (type) {
4007 case 'd':
4008 case 'u':
4009 result = Py_TYPE(val)->tp_str(val);
4010 break;
4011 case 'o':
4012 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4013 break;
4014 case 'x':
4015 case 'X':
4016 numnondigits = 2;
4017 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4018 break;
4019 default:
4020 assert(!"'type' not in [duoxX]");
4021 }
4022 if (!result)
4023 return NULL;
4024
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004025 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004026 if (!buf) {
4027 Py_DECREF(result);
4028 return NULL;
4029 }
4030
4031 /* To modify the string in-place, there can only be one reference. */
4032 if (Py_REFCNT(result) != 1) {
4033 PyErr_BadInternalCall();
4034 return NULL;
4035 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004036 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004037 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004038 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004039 return NULL;
4040 }
4041 len = (int)llen;
4042 if (buf[len-1] == 'L') {
4043 --len;
4044 buf[len] = '\0';
4045 }
4046 sign = buf[0] == '-';
4047 numnondigits += sign;
4048 numdigits = len - numnondigits;
4049 assert(numdigits > 0);
4050
4051 /* Get rid of base marker unless F_ALT */
4052 if ((flags & F_ALT) == 0) {
4053 /* Need to skip 0x, 0X or 0. */
4054 int skipped = 0;
4055 switch (type) {
4056 case 'o':
4057 assert(buf[sign] == '0');
4058 /* If 0 is only digit, leave it alone. */
4059 if (numdigits > 1) {
4060 skipped = 1;
4061 --numdigits;
4062 }
4063 break;
4064 case 'x':
4065 case 'X':
4066 assert(buf[sign] == '0');
4067 assert(buf[sign + 1] == 'x');
4068 skipped = 2;
4069 numnondigits -= 2;
4070 break;
4071 }
4072 if (skipped) {
4073 buf += skipped;
4074 len -= skipped;
4075 if (sign)
4076 buf[0] = '-';
4077 }
4078 assert(len == numnondigits + numdigits);
4079 assert(numdigits > 0);
4080 }
4081
4082 /* Fill with leading zeroes to meet minimum width. */
4083 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004084 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004085 numnondigits + prec);
4086 char *b1;
4087 if (!r1) {
4088 Py_DECREF(result);
4089 return NULL;
4090 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004091 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004092 for (i = 0; i < numnondigits; ++i)
4093 *b1++ = *buf++;
4094 for (i = 0; i < prec - numdigits; i++)
4095 *b1++ = '0';
4096 for (i = 0; i < numdigits; i++)
4097 *b1++ = *buf++;
4098 *b1 = '\0';
4099 Py_DECREF(result);
4100 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004101 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004102 len = numnondigits + prec;
4103 }
4104
4105 /* Fix up case for hex conversions. */
4106 if (type == 'X') {
4107 /* Need to convert all lower case letters to upper case.
4108 and need to convert 0x to 0X (and -0x to -0X). */
4109 for (i = 0; i < len; i++)
4110 if (buf[i] >= 'a' && buf[i] <= 'x')
4111 buf[i] -= 'a'-'A';
4112 }
4113 *pbuf = buf;
4114 *plen = len;
4115 return result;
4116}
4117
4118Py_LOCAL_INLINE(int)
4119formatint(char *buf, size_t buflen, int flags,
4120 int prec, int type, PyObject *v)
4121{
4122 /* fmt = '%#.' + `prec` + 'l' + `type`
4123 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4124 + 1 + 1 = 24 */
4125 char fmt[64]; /* plenty big enough! */
4126 char *sign;
4127 long x;
4128
4129 x = PyInt_AsLong(v);
4130 if (x == -1 && PyErr_Occurred()) {
4131 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4132 Py_TYPE(v)->tp_name);
4133 return -1;
4134 }
4135 if (x < 0 && type == 'u') {
4136 type = 'd';
4137 }
4138 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4139 sign = "-";
4140 else
4141 sign = "";
4142 if (prec < 0)
4143 prec = 1;
4144
4145 if ((flags & F_ALT) &&
4146 (type == 'x' || type == 'X')) {
4147 /* When converting under %#x or %#X, there are a number
4148 * of issues that cause pain:
4149 * - when 0 is being converted, the C standard leaves off
4150 * the '0x' or '0X', which is inconsistent with other
4151 * %#x/%#X conversions and inconsistent with Python's
4152 * hex() function
4153 * - there are platforms that violate the standard and
4154 * convert 0 with the '0x' or '0X'
4155 * (Metrowerks, Compaq Tru64)
4156 * - there are platforms that give '0x' when converting
4157 * under %#X, but convert 0 in accordance with the
4158 * standard (OS/2 EMX)
4159 *
4160 * We can achieve the desired consistency by inserting our
4161 * own '0x' or '0X' prefix, and substituting %x/%X in place
4162 * of %#x/%#X.
4163 *
4164 * Note that this is the same approach as used in
4165 * formatint() in unicodeobject.c
4166 */
4167 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4168 sign, type, prec, type);
4169 }
4170 else {
4171 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4172 sign, (flags&F_ALT) ? "#" : "",
4173 prec, type);
4174 }
4175
4176 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4177 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4178 */
4179 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4180 PyErr_SetString(PyExc_OverflowError,
4181 "formatted integer is too long (precision too large?)");
4182 return -1;
4183 }
4184 if (sign[0])
4185 PyOS_snprintf(buf, buflen, fmt, -x);
4186 else
4187 PyOS_snprintf(buf, buflen, fmt, x);
4188 return (int)strlen(buf);
4189}
4190
4191Py_LOCAL_INLINE(int)
4192formatchar(char *buf, size_t buflen, PyObject *v)
4193{
4194 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004195 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004196 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4197 return -1;
4198 }
4199 else {
4200 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4201 return -1;
4202 }
4203 buf[1] = '\0';
4204 return 1;
4205}
4206
4207/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4208
Mark Dickinson18cfada2009-11-23 18:46:41 +00004209 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004210 chars are formatted. XXX This is a magic number. Each formatting
4211 routine does bounds checking to ensure no overflow, but a better
4212 solution may be to malloc a buffer of appropriate size for each
4213 format. For now, the current solution is sufficient.
4214*/
4215#define FORMATBUFLEN (size_t)120
4216
4217PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004218PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004219{
4220 char *fmt, *res;
4221 Py_ssize_t arglen, argidx;
4222 Py_ssize_t reslen, rescnt, fmtcnt;
4223 int args_owned = 0;
4224 PyObject *result, *orig_args;
4225#ifdef Py_USING_UNICODE
4226 PyObject *v, *w;
4227#endif
4228 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004229 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004230 PyErr_BadInternalCall();
4231 return NULL;
4232 }
4233 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004234 fmt = PyString_AS_STRING(format);
4235 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004236 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004237 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004238 if (result == NULL)
4239 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004240 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004241 if (PyTuple_Check(args)) {
4242 arglen = PyTuple_GET_SIZE(args);
4243 argidx = 0;
4244 }
4245 else {
4246 arglen = -1;
4247 argidx = -2;
4248 }
4249 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4250 !PyObject_TypeCheck(args, &PyBaseString_Type))
4251 dict = args;
4252 while (--fmtcnt >= 0) {
4253 if (*fmt != '%') {
4254 if (--rescnt < 0) {
4255 rescnt = fmtcnt + 100;
4256 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004257 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004258 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004259 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004260 + reslen - rescnt;
4261 --rescnt;
4262 }
4263 *res++ = *fmt++;
4264 }
4265 else {
4266 /* Got a format specifier */
4267 int flags = 0;
4268 Py_ssize_t width = -1;
4269 int prec = -1;
4270 int c = '\0';
4271 int fill;
4272 int isnumok;
4273 PyObject *v = NULL;
4274 PyObject *temp = NULL;
4275 char *pbuf;
4276 int sign;
4277 Py_ssize_t len;
4278 char formatbuf[FORMATBUFLEN];
Mark Dickinson18cfada2009-11-23 18:46:41 +00004279 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004280#ifdef Py_USING_UNICODE
4281 char *fmt_start = fmt;
4282 Py_ssize_t argidx_start = argidx;
4283#endif
4284
4285 fmt++;
4286 if (*fmt == '(') {
4287 char *keystart;
4288 Py_ssize_t keylen;
4289 PyObject *key;
4290 int pcount = 1;
4291
4292 if (dict == NULL) {
4293 PyErr_SetString(PyExc_TypeError,
4294 "format requires a mapping");
4295 goto error;
4296 }
4297 ++fmt;
4298 --fmtcnt;
4299 keystart = fmt;
4300 /* Skip over balanced parentheses */
4301 while (pcount > 0 && --fmtcnt >= 0) {
4302 if (*fmt == ')')
4303 --pcount;
4304 else if (*fmt == '(')
4305 ++pcount;
4306 fmt++;
4307 }
4308 keylen = fmt - keystart - 1;
4309 if (fmtcnt < 0 || pcount > 0) {
4310 PyErr_SetString(PyExc_ValueError,
4311 "incomplete format key");
4312 goto error;
4313 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004314 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004315 keylen);
4316 if (key == NULL)
4317 goto error;
4318 if (args_owned) {
4319 Py_DECREF(args);
4320 args_owned = 0;
4321 }
4322 args = PyObject_GetItem(dict, key);
4323 Py_DECREF(key);
4324 if (args == NULL) {
4325 goto error;
4326 }
4327 args_owned = 1;
4328 arglen = -1;
4329 argidx = -2;
4330 }
4331 while (--fmtcnt >= 0) {
4332 switch (c = *fmt++) {
4333 case '-': flags |= F_LJUST; continue;
4334 case '+': flags |= F_SIGN; continue;
4335 case ' ': flags |= F_BLANK; continue;
4336 case '#': flags |= F_ALT; continue;
4337 case '0': flags |= F_ZERO; continue;
4338 }
4339 break;
4340 }
4341 if (c == '*') {
4342 v = getnextarg(args, arglen, &argidx);
4343 if (v == NULL)
4344 goto error;
4345 if (!PyInt_Check(v)) {
4346 PyErr_SetString(PyExc_TypeError,
4347 "* wants int");
4348 goto error;
4349 }
4350 width = PyInt_AsLong(v);
4351 if (width < 0) {
4352 flags |= F_LJUST;
4353 width = -width;
4354 }
4355 if (--fmtcnt >= 0)
4356 c = *fmt++;
4357 }
4358 else if (c >= 0 && isdigit(c)) {
4359 width = c - '0';
4360 while (--fmtcnt >= 0) {
4361 c = Py_CHARMASK(*fmt++);
4362 if (!isdigit(c))
4363 break;
4364 if ((width*10) / 10 != width) {
4365 PyErr_SetString(
4366 PyExc_ValueError,
4367 "width too big");
4368 goto error;
4369 }
4370 width = width*10 + (c - '0');
4371 }
4372 }
4373 if (c == '.') {
4374 prec = 0;
4375 if (--fmtcnt >= 0)
4376 c = *fmt++;
4377 if (c == '*') {
4378 v = getnextarg(args, arglen, &argidx);
4379 if (v == NULL)
4380 goto error;
4381 if (!PyInt_Check(v)) {
4382 PyErr_SetString(
4383 PyExc_TypeError,
4384 "* wants int");
4385 goto error;
4386 }
4387 prec = PyInt_AsLong(v);
4388 if (prec < 0)
4389 prec = 0;
4390 if (--fmtcnt >= 0)
4391 c = *fmt++;
4392 }
4393 else if (c >= 0 && isdigit(c)) {
4394 prec = c - '0';
4395 while (--fmtcnt >= 0) {
4396 c = Py_CHARMASK(*fmt++);
4397 if (!isdigit(c))
4398 break;
4399 if ((prec*10) / 10 != prec) {
4400 PyErr_SetString(
4401 PyExc_ValueError,
4402 "prec too big");
4403 goto error;
4404 }
4405 prec = prec*10 + (c - '0');
4406 }
4407 }
4408 } /* prec */
4409 if (fmtcnt >= 0) {
4410 if (c == 'h' || c == 'l' || c == 'L') {
4411 if (--fmtcnt >= 0)
4412 c = *fmt++;
4413 }
4414 }
4415 if (fmtcnt < 0) {
4416 PyErr_SetString(PyExc_ValueError,
4417 "incomplete format");
4418 goto error;
4419 }
4420 if (c != '%') {
4421 v = getnextarg(args, arglen, &argidx);
4422 if (v == NULL)
4423 goto error;
4424 }
4425 sign = 0;
4426 fill = ' ';
4427 switch (c) {
4428 case '%':
4429 pbuf = "%";
4430 len = 1;
4431 break;
4432 case 's':
4433#ifdef Py_USING_UNICODE
4434 if (PyUnicode_Check(v)) {
4435 fmt = fmt_start;
4436 argidx = argidx_start;
4437 goto unicode;
4438 }
4439#endif
4440 temp = _PyObject_Str(v);
4441#ifdef Py_USING_UNICODE
4442 if (temp != NULL && PyUnicode_Check(temp)) {
4443 Py_DECREF(temp);
4444 fmt = fmt_start;
4445 argidx = argidx_start;
4446 goto unicode;
4447 }
4448#endif
4449 /* Fall through */
4450 case 'r':
4451 if (c == 'r')
4452 temp = PyObject_Repr(v);
4453 if (temp == NULL)
4454 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004455 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004456 PyErr_SetString(PyExc_TypeError,
4457 "%s argument has non-string str()");
4458 Py_DECREF(temp);
4459 goto error;
4460 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004461 pbuf = PyString_AS_STRING(temp);
4462 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004463 if (prec >= 0 && len > prec)
4464 len = prec;
4465 break;
4466 case 'i':
4467 case 'd':
4468 case 'u':
4469 case 'o':
4470 case 'x':
4471 case 'X':
4472 if (c == 'i')
4473 c = 'd';
4474 isnumok = 0;
4475 if (PyNumber_Check(v)) {
4476 PyObject *iobj=NULL;
4477
4478 if (PyInt_Check(v) || (PyLong_Check(v))) {
4479 iobj = v;
4480 Py_INCREF(iobj);
4481 }
4482 else {
4483 iobj = PyNumber_Int(v);
4484 if (iobj==NULL) iobj = PyNumber_Long(v);
4485 }
4486 if (iobj!=NULL) {
4487 if (PyInt_Check(iobj)) {
4488 isnumok = 1;
4489 pbuf = formatbuf;
4490 len = formatint(pbuf,
4491 sizeof(formatbuf),
4492 flags, prec, c, iobj);
4493 Py_DECREF(iobj);
4494 if (len < 0)
4495 goto error;
4496 sign = 1;
4497 }
4498 else if (PyLong_Check(iobj)) {
4499 int ilen;
4500
4501 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004502 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004503 prec, c, &pbuf, &ilen);
4504 Py_DECREF(iobj);
4505 len = ilen;
4506 if (!temp)
4507 goto error;
4508 sign = 1;
4509 }
4510 else {
4511 Py_DECREF(iobj);
4512 }
4513 }
4514 }
4515 if (!isnumok) {
4516 PyErr_Format(PyExc_TypeError,
4517 "%%%c format: a number is required, "
4518 "not %.200s", c, Py_TYPE(v)->tp_name);
4519 goto error;
4520 }
4521 if (flags & F_ZERO)
4522 fill = '0';
4523 break;
4524 case 'e':
4525 case 'E':
4526 case 'f':
4527 case 'F':
4528 case 'g':
4529 case 'G':
Mark Dickinson18cfada2009-11-23 18:46:41 +00004530 temp = formatfloat(v, flags, prec, c);
4531 if (temp == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00004532 goto error;
Mark Dickinson18cfada2009-11-23 18:46:41 +00004533 pbuf = PyString_AS_STRING(temp);
4534 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004535 sign = 1;
4536 if (flags & F_ZERO)
4537 fill = '0';
4538 break;
4539 case 'c':
4540#ifdef Py_USING_UNICODE
4541 if (PyUnicode_Check(v)) {
4542 fmt = fmt_start;
4543 argidx = argidx_start;
4544 goto unicode;
4545 }
4546#endif
4547 pbuf = formatbuf;
4548 len = formatchar(pbuf, sizeof(formatbuf), v);
4549 if (len < 0)
4550 goto error;
4551 break;
4552 default:
4553 PyErr_Format(PyExc_ValueError,
4554 "unsupported format character '%c' (0x%x) "
4555 "at index %zd",
4556 c, c,
4557 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004558 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004559 goto error;
4560 }
4561 if (sign) {
4562 if (*pbuf == '-' || *pbuf == '+') {
4563 sign = *pbuf++;
4564 len--;
4565 }
4566 else if (flags & F_SIGN)
4567 sign = '+';
4568 else if (flags & F_BLANK)
4569 sign = ' ';
4570 else
4571 sign = 0;
4572 }
4573 if (width < len)
4574 width = len;
4575 if (rescnt - (sign != 0) < width) {
4576 reslen -= rescnt;
4577 rescnt = width + fmtcnt + 100;
4578 reslen += rescnt;
4579 if (reslen < 0) {
4580 Py_DECREF(result);
4581 Py_XDECREF(temp);
4582 return PyErr_NoMemory();
4583 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004584 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00004585 Py_XDECREF(temp);
4586 return NULL;
4587 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004588 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004589 + reslen - rescnt;
4590 }
4591 if (sign) {
4592 if (fill != ' ')
4593 *res++ = sign;
4594 rescnt--;
4595 if (width > len)
4596 width--;
4597 }
4598 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4599 assert(pbuf[0] == '0');
4600 assert(pbuf[1] == c);
4601 if (fill != ' ') {
4602 *res++ = *pbuf++;
4603 *res++ = *pbuf++;
4604 }
4605 rescnt -= 2;
4606 width -= 2;
4607 if (width < 0)
4608 width = 0;
4609 len -= 2;
4610 }
4611 if (width > len && !(flags & F_LJUST)) {
4612 do {
4613 --rescnt;
4614 *res++ = fill;
4615 } while (--width > len);
4616 }
4617 if (fill == ' ') {
4618 if (sign)
4619 *res++ = sign;
4620 if ((flags & F_ALT) &&
4621 (c == 'x' || c == 'X')) {
4622 assert(pbuf[0] == '0');
4623 assert(pbuf[1] == c);
4624 *res++ = *pbuf++;
4625 *res++ = *pbuf++;
4626 }
4627 }
4628 Py_MEMCPY(res, pbuf, len);
4629 res += len;
4630 rescnt -= len;
4631 while (--width >= len) {
4632 --rescnt;
4633 *res++ = ' ';
4634 }
4635 if (dict && (argidx < arglen) && c != '%') {
4636 PyErr_SetString(PyExc_TypeError,
4637 "not all arguments converted during string formatting");
4638 Py_XDECREF(temp);
4639 goto error;
4640 }
4641 Py_XDECREF(temp);
4642 } /* '%' */
4643 } /* until end */
4644 if (argidx < arglen && !dict) {
4645 PyErr_SetString(PyExc_TypeError,
4646 "not all arguments converted during string formatting");
4647 goto error;
4648 }
4649 if (args_owned) {
4650 Py_DECREF(args);
4651 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004652 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00004653 return result;
4654
4655#ifdef Py_USING_UNICODE
4656 unicode:
4657 if (args_owned) {
4658 Py_DECREF(args);
4659 args_owned = 0;
4660 }
4661 /* Fiddle args right (remove the first argidx arguments) */
4662 if (PyTuple_Check(orig_args) && argidx > 0) {
4663 PyObject *v;
4664 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4665 v = PyTuple_New(n);
4666 if (v == NULL)
4667 goto error;
4668 while (--n >= 0) {
4669 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4670 Py_INCREF(w);
4671 PyTuple_SET_ITEM(v, n, w);
4672 }
4673 args = v;
4674 } else {
4675 Py_INCREF(orig_args);
4676 args = orig_args;
4677 }
4678 args_owned = 1;
4679 /* Take what we have of the result and let the Unicode formatting
4680 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004681 rescnt = res - PyString_AS_STRING(result);
4682 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00004683 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004684 fmtcnt = PyString_GET_SIZE(format) - \
4685 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00004686 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4687 if (format == NULL)
4688 goto error;
4689 v = PyUnicode_Format(format, args);
4690 Py_DECREF(format);
4691 if (v == NULL)
4692 goto error;
4693 /* Paste what we have (result) to what the Unicode formatting
4694 function returned (v) and return the result (or error) */
4695 w = PyUnicode_Concat(result, v);
4696 Py_DECREF(result);
4697 Py_DECREF(v);
4698 Py_DECREF(args);
4699 return w;
4700#endif /* Py_USING_UNICODE */
4701
4702 error:
4703 Py_DECREF(result);
4704 if (args_owned) {
4705 Py_DECREF(args);
4706 }
4707 return NULL;
4708}
4709
4710void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004711PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004712{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004713 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00004714 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004715 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004716 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00004717 /* If it's a string subclass, we don't really know what putting
4718 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004719 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00004720 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004721 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00004722 return;
4723 if (interned == NULL) {
4724 interned = PyDict_New();
4725 if (interned == NULL) {
4726 PyErr_Clear(); /* Don't leave an exception */
4727 return;
4728 }
4729 }
4730 t = PyDict_GetItem(interned, (PyObject *)s);
4731 if (t) {
4732 Py_INCREF(t);
4733 Py_DECREF(*p);
4734 *p = t;
4735 return;
4736 }
4737
4738 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4739 PyErr_Clear();
4740 return;
4741 }
4742 /* The two references in interned are not counted by refcnt.
4743 The string deallocator will take care of this */
4744 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004745 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004746}
4747
4748void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004749PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004750{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004751 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004752 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4753 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004754 Py_INCREF(*p);
4755 }
4756}
4757
4758
4759PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004760PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004761{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004762 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00004763 if (s == NULL)
4764 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004765 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00004766 return s;
4767}
4768
4769void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004770PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004771{
4772 int i;
4773 for (i = 0; i < UCHAR_MAX + 1; i++) {
4774 Py_XDECREF(characters[i]);
4775 characters[i] = NULL;
4776 }
4777 Py_XDECREF(nullstring);
4778 nullstring = NULL;
4779}
4780
4781void _Py_ReleaseInternedStrings(void)
4782{
4783 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004784 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00004785 Py_ssize_t i, n;
4786 Py_ssize_t immortal_size = 0, mortal_size = 0;
4787
4788 if (interned == NULL || !PyDict_Check(interned))
4789 return;
4790 keys = PyDict_Keys(interned);
4791 if (keys == NULL || !PyList_Check(keys)) {
4792 PyErr_Clear();
4793 return;
4794 }
4795
4796 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4797 detector, interned strings are not forcibly deallocated; rather, we
4798 give them their stolen references back, and then clear and DECREF
4799 the interned dict. */
4800
4801 n = PyList_GET_SIZE(keys);
4802 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4803 n);
4804 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004805 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00004806 switch (s->ob_sstate) {
4807 case SSTATE_NOT_INTERNED:
4808 /* XXX Shouldn't happen */
4809 break;
4810 case SSTATE_INTERNED_IMMORTAL:
4811 Py_REFCNT(s) += 1;
4812 immortal_size += Py_SIZE(s);
4813 break;
4814 case SSTATE_INTERNED_MORTAL:
4815 Py_REFCNT(s) += 2;
4816 mortal_size += Py_SIZE(s);
4817 break;
4818 default:
4819 Py_FatalError("Inconsistent interned string state.");
4820 }
4821 s->ob_sstate = SSTATE_NOT_INTERNED;
4822 }
4823 fprintf(stderr, "total size of all interned strings: "
4824 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4825 "mortal/immortal\n", mortal_size, immortal_size);
4826 Py_DECREF(keys);
4827 PyDict_Clear(interned);
4828 Py_DECREF(interned);
4829 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004830}