blob: 982679258b64e16074c998a664bd287758642278 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Benjamin Peterson26da9202012-02-21 11:08:50 -05001265 assert(_Py_HashSecret_Initialized);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001266 if (a->ob_shash != -1)
1267 return a->ob_shash;
1268 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001269 /*
1270 We make the hash of the empty string be 0, rather than using
1271 (prefix ^ suffix), since this slightly obfuscates the hash secret
1272 */
1273 if (len == 0) {
1274 a->ob_shash = 0;
1275 return 0;
1276 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001277 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001278 x = _Py_HashSecret.prefix;
1279 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001280 while (--len >= 0)
1281 x = (1000003*x) ^ *p++;
1282 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001283 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001284 if (x == -1)
1285 x = -2;
1286 a->ob_shash = x;
1287 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001288}
1289
1290static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001291string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001292{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001293 if (PyIndex_Check(item)) {
1294 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1295 if (i == -1 && PyErr_Occurred())
1296 return NULL;
1297 if (i < 0)
1298 i += PyString_GET_SIZE(self);
1299 return string_item(self, i);
1300 }
1301 else if (PySlice_Check(item)) {
1302 Py_ssize_t start, stop, step, slicelength, cur, i;
1303 char* source_buf;
1304 char* result_buf;
1305 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001306
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001307 if (PySlice_GetIndicesEx((PySliceObject*)item,
1308 PyString_GET_SIZE(self),
1309 &start, &stop, &step, &slicelength) < 0) {
1310 return NULL;
1311 }
Christian Heimes44720832008-05-26 13:01:01 +00001312
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001313 if (slicelength <= 0) {
1314 return PyString_FromStringAndSize("", 0);
1315 }
1316 else if (start == 0 && step == 1 &&
1317 slicelength == PyString_GET_SIZE(self) &&
1318 PyString_CheckExact(self)) {
1319 Py_INCREF(self);
1320 return (PyObject *)self;
1321 }
1322 else if (step == 1) {
1323 return PyString_FromStringAndSize(
1324 PyString_AS_STRING(self) + start,
1325 slicelength);
1326 }
1327 else {
1328 source_buf = PyString_AsString((PyObject*)self);
1329 result_buf = (char *)PyMem_Malloc(slicelength);
1330 if (result_buf == NULL)
1331 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001332
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001333 for (cur = start, i = 0; i < slicelength;
1334 cur += step, i++) {
1335 result_buf[i] = source_buf[cur];
1336 }
Christian Heimes44720832008-05-26 13:01:01 +00001337
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001338 result = PyString_FromStringAndSize(result_buf,
1339 slicelength);
1340 PyMem_Free(result_buf);
1341 return result;
1342 }
1343 }
1344 else {
1345 PyErr_Format(PyExc_TypeError,
1346 "string indices must be integers, not %.200s",
1347 Py_TYPE(item)->tp_name);
1348 return NULL;
1349 }
Christian Heimes44720832008-05-26 13:01:01 +00001350}
1351
1352static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001353string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001354{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001355 if ( index != 0 ) {
1356 PyErr_SetString(PyExc_SystemError,
1357 "accessing non-existent string segment");
1358 return -1;
1359 }
1360 *ptr = (void *)self->ob_sval;
1361 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001362}
1363
1364static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001365string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001366{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001367 PyErr_SetString(PyExc_TypeError,
1368 "Cannot use string as modifiable buffer");
1369 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001370}
1371
1372static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001373string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001374{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001375 if ( lenp )
1376 *lenp = Py_SIZE(self);
1377 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001378}
1379
1380static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001381string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001382{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001383 if ( index != 0 ) {
1384 PyErr_SetString(PyExc_SystemError,
1385 "accessing non-existent string segment");
1386 return -1;
1387 }
1388 *ptr = self->ob_sval;
1389 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001390}
1391
1392static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001393string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001394{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001395 return PyBuffer_FillInfo(view, (PyObject*)self,
1396 (void *)self->ob_sval, Py_SIZE(self),
1397 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001398}
1399
1400static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 (lenfunc)string_length, /*sq_length*/
1402 (binaryfunc)string_concat, /*sq_concat*/
1403 (ssizeargfunc)string_repeat, /*sq_repeat*/
1404 (ssizeargfunc)string_item, /*sq_item*/
1405 (ssizessizeargfunc)string_slice, /*sq_slice*/
1406 0, /*sq_ass_item*/
1407 0, /*sq_ass_slice*/
1408 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001409};
1410
1411static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001412 (lenfunc)string_length,
1413 (binaryfunc)string_subscript,
1414 0,
Christian Heimes44720832008-05-26 13:01:01 +00001415};
1416
1417static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001418 (readbufferproc)string_buffer_getreadbuf,
1419 (writebufferproc)string_buffer_getwritebuf,
1420 (segcountproc)string_buffer_getsegcount,
1421 (charbufferproc)string_buffer_getcharbuf,
1422 (getbufferproc)string_buffer_getbuffer,
1423 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001424};
1425
1426
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001427
Christian Heimes44720832008-05-26 13:01:01 +00001428#define LEFTSTRIP 0
1429#define RIGHTSTRIP 1
1430#define BOTHSTRIP 2
1431
1432/* Arrays indexed by above */
1433static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1434
1435#define STRIPNAME(i) (stripformat[i]+3)
1436
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001438"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001439\n\
Christian Heimes44720832008-05-26 13:01:01 +00001440Return a list of the words in the string S, using sep as the\n\
1441delimiter string. If maxsplit is given, at most maxsplit\n\
1442splits are done. If sep is not specified or is None, any\n\
1443whitespace string is a separator and empty strings are removed\n\
1444from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001445
1446static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001447string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001448{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001449 Py_ssize_t len = PyString_GET_SIZE(self), n;
1450 Py_ssize_t maxsplit = -1;
1451 const char *s = PyString_AS_STRING(self), *sub;
1452 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001453
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001454 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1455 return NULL;
1456 if (maxsplit < 0)
1457 maxsplit = PY_SSIZE_T_MAX;
1458 if (subobj == Py_None)
1459 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1460 if (PyString_Check(subobj)) {
1461 sub = PyString_AS_STRING(subobj);
1462 n = PyString_GET_SIZE(subobj);
1463 }
Christian Heimes44720832008-05-26 13:01:01 +00001464#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001465 else if (PyUnicode_Check(subobj))
1466 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001467#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001468 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1469 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001470
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001471 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472}
1473
1474PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001475"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001477Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001479found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001480
1481static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001482string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001484 const char *sep;
1485 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001487 if (PyString_Check(sep_obj)) {
1488 sep = PyString_AS_STRING(sep_obj);
1489 sep_len = PyString_GET_SIZE(sep_obj);
1490 }
Christian Heimes44720832008-05-26 13:01:01 +00001491#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001492 else if (PyUnicode_Check(sep_obj))
1493 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001494#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001495 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1496 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001497
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001498 return stringlib_partition(
1499 (PyObject*) self,
1500 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1501 sep_obj, sep, sep_len
1502 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503}
1504
1505PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001506"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001508Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001509the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001510separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001511
1512static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001513string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001515 const char *sep;
1516 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001518 if (PyString_Check(sep_obj)) {
1519 sep = PyString_AS_STRING(sep_obj);
1520 sep_len = PyString_GET_SIZE(sep_obj);
1521 }
Christian Heimes44720832008-05-26 13:01:01 +00001522#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001523 else if (PyUnicode_Check(sep_obj))
1524 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001525#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001526 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1527 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001529 return stringlib_rpartition(
1530 (PyObject*) self,
1531 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1532 sep_obj, sep, sep_len
1533 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001534}
1535
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001537"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538\n\
Christian Heimes44720832008-05-26 13:01:01 +00001539Return a list of the words in the string S, using sep as the\n\
1540delimiter string, starting at the end of the string and working\n\
1541to the front. If maxsplit is given, at most maxsplit splits are\n\
1542done. If sep is not specified or is None, any whitespace string\n\
1543is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001544
1545static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001546string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001547{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001548 Py_ssize_t len = PyString_GET_SIZE(self), n;
1549 Py_ssize_t maxsplit = -1;
1550 const char *s = PyString_AS_STRING(self), *sub;
1551 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001552
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1554 return NULL;
1555 if (maxsplit < 0)
1556 maxsplit = PY_SSIZE_T_MAX;
1557 if (subobj == Py_None)
1558 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1559 if (PyString_Check(subobj)) {
1560 sub = PyString_AS_STRING(subobj);
1561 n = PyString_GET_SIZE(subobj);
1562 }
Christian Heimes44720832008-05-26 13:01:01 +00001563#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001564 else if (PyUnicode_Check(subobj))
1565 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001566#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001567 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1568 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001570 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001571}
1572
1573
1574PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001575"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001576\n\
1577Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001578iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001579
1580static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001581string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001583 char *sep = PyString_AS_STRING(self);
1584 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1585 PyObject *res = NULL;
1586 char *p;
1587 Py_ssize_t seqlen = 0;
1588 size_t sz = 0;
1589 Py_ssize_t i;
1590 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001591
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001592 seq = PySequence_Fast(orig, "");
1593 if (seq == NULL) {
1594 return NULL;
1595 }
Christian Heimes44720832008-05-26 13:01:01 +00001596
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001597 seqlen = PySequence_Size(seq);
1598 if (seqlen == 0) {
1599 Py_DECREF(seq);
1600 return PyString_FromString("");
1601 }
1602 if (seqlen == 1) {
1603 item = PySequence_Fast_GET_ITEM(seq, 0);
1604 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1605 Py_INCREF(item);
1606 Py_DECREF(seq);
1607 return item;
1608 }
1609 }
Christian Heimes44720832008-05-26 13:01:01 +00001610
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001611 /* There are at least two things to join, or else we have a subclass
1612 * of the builtin types in the sequence.
1613 * Do a pre-pass to figure out the total amount of space we'll
1614 * need (sz), see whether any argument is absurd, and defer to
1615 * the Unicode join if appropriate.
1616 */
1617 for (i = 0; i < seqlen; i++) {
1618 const size_t old_sz = sz;
1619 item = PySequence_Fast_GET_ITEM(seq, i);
1620 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001621#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001622 if (PyUnicode_Check(item)) {
1623 /* Defer to Unicode join.
1624 * CAUTION: There's no gurantee that the
1625 * original sequence can be iterated over
1626 * again, so we must pass seq here.
1627 */
1628 PyObject *result;
1629 result = PyUnicode_Join((PyObject *)self, seq);
1630 Py_DECREF(seq);
1631 return result;
1632 }
Christian Heimes44720832008-05-26 13:01:01 +00001633#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001634 PyErr_Format(PyExc_TypeError,
1635 "sequence item %zd: expected string,"
1636 " %.80s found",
1637 i, Py_TYPE(item)->tp_name);
1638 Py_DECREF(seq);
1639 return NULL;
1640 }
1641 sz += PyString_GET_SIZE(item);
1642 if (i != 0)
1643 sz += seplen;
1644 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1645 PyErr_SetString(PyExc_OverflowError,
1646 "join() result is too long for a Python string");
1647 Py_DECREF(seq);
1648 return NULL;
1649 }
1650 }
Christian Heimes44720832008-05-26 13:01:01 +00001651
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001652 /* Allocate result space. */
1653 res = PyString_FromStringAndSize((char*)NULL, sz);
1654 if (res == NULL) {
1655 Py_DECREF(seq);
1656 return NULL;
1657 }
Christian Heimes44720832008-05-26 13:01:01 +00001658
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001659 /* Catenate everything. */
1660 p = PyString_AS_STRING(res);
1661 for (i = 0; i < seqlen; ++i) {
1662 size_t n;
1663 item = PySequence_Fast_GET_ITEM(seq, i);
1664 n = PyString_GET_SIZE(item);
1665 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1666 p += n;
1667 if (i < seqlen - 1) {
1668 Py_MEMCPY(p, sep, seplen);
1669 p += seplen;
1670 }
1671 }
Christian Heimes44720832008-05-26 13:01:01 +00001672
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001673 Py_DECREF(seq);
1674 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001675}
1676
1677PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001678_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001679{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001680 assert(sep != NULL && PyString_Check(sep));
1681 assert(x != NULL);
1682 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001683}
1684
Antoine Pitrou64672132010-01-13 07:55:48 +00001685/* helper macro to fixup start/end slice values */
1686#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001687 if (end > len) \
1688 end = len; \
1689 else if (end < 0) { \
1690 end += len; \
1691 if (end < 0) \
1692 end = 0; \
1693 } \
1694 if (start < 0) { \
1695 start += len; \
1696 if (start < 0) \
1697 start = 0; \
1698 }
Christian Heimes44720832008-05-26 13:01:01 +00001699
1700Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001701string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001702{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001703 PyObject *subobj;
1704 const char *sub;
1705 Py_ssize_t sub_len;
1706 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001707
Jesus Cea44e81682011-04-20 16:39:15 +02001708 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1709 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001710 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001711
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001712 if (PyString_Check(subobj)) {
1713 sub = PyString_AS_STRING(subobj);
1714 sub_len = PyString_GET_SIZE(subobj);
1715 }
Christian Heimes44720832008-05-26 13:01:01 +00001716#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001717 else if (PyUnicode_Check(subobj))
1718 return PyUnicode_Find(
1719 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001720#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001721 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1722 /* XXX - the "expected a character buffer object" is pretty
1723 confusing for a non-expert. remap to something else ? */
1724 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001725
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001726 if (dir > 0)
1727 return stringlib_find_slice(
1728 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1729 sub, sub_len, start, end);
1730 else
1731 return stringlib_rfind_slice(
1732 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1733 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001734}
1735
1736
1737PyDoc_STRVAR(find__doc__,
1738"S.find(sub [,start [,end]]) -> int\n\
1739\n\
1740Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001741such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001742arguments start and end are interpreted as in slice notation.\n\
1743\n\
1744Return -1 on failure.");
1745
1746static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001747string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001748{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001749 Py_ssize_t result = string_find_internal(self, args, +1);
1750 if (result == -2)
1751 return NULL;
1752 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001753}
1754
1755
1756PyDoc_STRVAR(index__doc__,
1757"S.index(sub [,start [,end]]) -> int\n\
1758\n\
1759Like S.find() but raise ValueError when the substring is not found.");
1760
1761static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001762string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001763{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001764 Py_ssize_t result = string_find_internal(self, args, +1);
1765 if (result == -2)
1766 return NULL;
1767 if (result == -1) {
1768 PyErr_SetString(PyExc_ValueError,
1769 "substring not found");
1770 return NULL;
1771 }
1772 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001773}
1774
1775
1776PyDoc_STRVAR(rfind__doc__,
1777"S.rfind(sub [,start [,end]]) -> int\n\
1778\n\
1779Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001780such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001781arguments start and end are interpreted as in slice notation.\n\
1782\n\
1783Return -1 on failure.");
1784
1785static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001786string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001787{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001788 Py_ssize_t result = string_find_internal(self, args, -1);
1789 if (result == -2)
1790 return NULL;
1791 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001792}
1793
1794
1795PyDoc_STRVAR(rindex__doc__,
1796"S.rindex(sub [,start [,end]]) -> int\n\
1797\n\
1798Like S.rfind() but raise ValueError when the substring is not found.");
1799
1800static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001801string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001803 Py_ssize_t result = string_find_internal(self, args, -1);
1804 if (result == -2)
1805 return NULL;
1806 if (result == -1) {
1807 PyErr_SetString(PyExc_ValueError,
1808 "substring not found");
1809 return NULL;
1810 }
1811 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001812}
1813
1814
1815Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001816do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001817{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001818 char *s = PyString_AS_STRING(self);
1819 Py_ssize_t len = PyString_GET_SIZE(self);
1820 char *sep = PyString_AS_STRING(sepobj);
1821 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1822 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001823
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001824 i = 0;
1825 if (striptype != RIGHTSTRIP) {
1826 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1827 i++;
1828 }
1829 }
Christian Heimes44720832008-05-26 13:01:01 +00001830
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001831 j = len;
1832 if (striptype != LEFTSTRIP) {
1833 do {
1834 j--;
1835 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1836 j++;
1837 }
Christian Heimes44720832008-05-26 13:01:01 +00001838
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001839 if (i == 0 && j == len && PyString_CheckExact(self)) {
1840 Py_INCREF(self);
1841 return (PyObject*)self;
1842 }
1843 else
1844 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001845}
1846
1847
1848Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001849do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001851 char *s = PyString_AS_STRING(self);
1852 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001853
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001854 i = 0;
1855 if (striptype != RIGHTSTRIP) {
1856 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1857 i++;
1858 }
1859 }
Christian Heimes44720832008-05-26 13:01:01 +00001860
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001861 j = len;
1862 if (striptype != LEFTSTRIP) {
1863 do {
1864 j--;
1865 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1866 j++;
1867 }
Christian Heimes44720832008-05-26 13:01:01 +00001868
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001869 if (i == 0 && j == len && PyString_CheckExact(self)) {
1870 Py_INCREF(self);
1871 return (PyObject*)self;
1872 }
1873 else
1874 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001875}
1876
1877
1878Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001879do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001880{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001881 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001882
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001883 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1884 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001885
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001886 if (sep != NULL && sep != Py_None) {
1887 if (PyString_Check(sep))
1888 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001889#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001890 else if (PyUnicode_Check(sep)) {
1891 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1892 PyObject *res;
1893 if (uniself==NULL)
1894 return NULL;
1895 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1896 striptype, sep);
1897 Py_DECREF(uniself);
1898 return res;
1899 }
Christian Heimes44720832008-05-26 13:01:01 +00001900#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001901 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001902#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001903 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001904#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001905 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001906#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001907 STRIPNAME(striptype));
1908 return NULL;
1909 }
Christian Heimes44720832008-05-26 13:01:01 +00001910
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001911 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001912}
1913
1914
1915PyDoc_STRVAR(strip__doc__,
1916"S.strip([chars]) -> string or unicode\n\
1917\n\
1918Return a copy of the string S with leading and trailing\n\
1919whitespace removed.\n\
1920If chars is given and not None, remove characters in chars instead.\n\
1921If chars is unicode, S will be converted to unicode before stripping");
1922
1923static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001924string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001925{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001926 if (PyTuple_GET_SIZE(args) == 0)
1927 return do_strip(self, BOTHSTRIP); /* Common case */
1928 else
1929 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001930}
1931
1932
1933PyDoc_STRVAR(lstrip__doc__,
1934"S.lstrip([chars]) -> string or unicode\n\
1935\n\
1936Return a copy of the string S with leading whitespace removed.\n\
1937If chars is given and not None, remove characters in chars instead.\n\
1938If chars is unicode, S will be converted to unicode before stripping");
1939
1940static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001941string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001942{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001943 if (PyTuple_GET_SIZE(args) == 0)
1944 return do_strip(self, LEFTSTRIP); /* Common case */
1945 else
1946 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001947}
1948
1949
1950PyDoc_STRVAR(rstrip__doc__,
1951"S.rstrip([chars]) -> string or unicode\n\
1952\n\
1953Return a copy of the string S with trailing whitespace removed.\n\
1954If chars is given and not None, remove characters in chars instead.\n\
1955If chars is unicode, S will be converted to unicode before stripping");
1956
1957static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001958string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001959{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001960 if (PyTuple_GET_SIZE(args) == 0)
1961 return do_strip(self, RIGHTSTRIP); /* Common case */
1962 else
1963 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001964}
1965
1966
1967PyDoc_STRVAR(lower__doc__,
1968"S.lower() -> string\n\
1969\n\
1970Return a copy of the string S converted to lowercase.");
1971
1972/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1973#ifndef _tolower
1974#define _tolower tolower
1975#endif
1976
1977static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001978string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001979{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001980 char *s;
1981 Py_ssize_t i, n = PyString_GET_SIZE(self);
1982 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001983
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001984 newobj = PyString_FromStringAndSize(NULL, n);
1985 if (!newobj)
1986 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001987
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001988 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001989
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001990 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001991
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001992 for (i = 0; i < n; i++) {
1993 int c = Py_CHARMASK(s[i]);
1994 if (isupper(c))
1995 s[i] = _tolower(c);
1996 }
Christian Heimes44720832008-05-26 13:01:01 +00001997
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001998 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001999}
2000
2001PyDoc_STRVAR(upper__doc__,
2002"S.upper() -> string\n\
2003\n\
2004Return a copy of the string S converted to uppercase.");
2005
2006#ifndef _toupper
2007#define _toupper toupper
2008#endif
2009
2010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002011string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002013 char *s;
2014 Py_ssize_t i, n = PyString_GET_SIZE(self);
2015 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002016
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002017 newobj = PyString_FromStringAndSize(NULL, n);
2018 if (!newobj)
2019 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002020
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002021 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002023 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002024
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002025 for (i = 0; i < n; i++) {
2026 int c = Py_CHARMASK(s[i]);
2027 if (islower(c))
2028 s[i] = _toupper(c);
2029 }
Christian Heimes44720832008-05-26 13:01:01 +00002030
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002031 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002032}
2033
2034PyDoc_STRVAR(title__doc__,
2035"S.title() -> string\n\
2036\n\
2037Return a titlecased version of S, i.e. words start with uppercase\n\
2038characters, all remaining cased characters have lowercase.");
2039
2040static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002041string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002042{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002043 char *s = PyString_AS_STRING(self), *s_new;
2044 Py_ssize_t i, n = PyString_GET_SIZE(self);
2045 int previous_is_cased = 0;
2046 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002047
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002048 newobj = PyString_FromStringAndSize(NULL, n);
2049 if (newobj == NULL)
2050 return NULL;
2051 s_new = PyString_AsString(newobj);
2052 for (i = 0; i < n; i++) {
2053 int c = Py_CHARMASK(*s++);
2054 if (islower(c)) {
2055 if (!previous_is_cased)
2056 c = toupper(c);
2057 previous_is_cased = 1;
2058 } else if (isupper(c)) {
2059 if (previous_is_cased)
2060 c = tolower(c);
2061 previous_is_cased = 1;
2062 } else
2063 previous_is_cased = 0;
2064 *s_new++ = c;
2065 }
2066 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002067}
2068
2069PyDoc_STRVAR(capitalize__doc__,
2070"S.capitalize() -> string\n\
2071\n\
2072Return a copy of the string S with only its first character\n\
2073capitalized.");
2074
2075static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002076string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002077{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002078 char *s = PyString_AS_STRING(self), *s_new;
2079 Py_ssize_t i, n = PyString_GET_SIZE(self);
2080 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002081
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002082 newobj = PyString_FromStringAndSize(NULL, n);
2083 if (newobj == NULL)
2084 return NULL;
2085 s_new = PyString_AsString(newobj);
2086 if (0 < n) {
2087 int c = Py_CHARMASK(*s++);
2088 if (islower(c))
2089 *s_new = toupper(c);
2090 else
2091 *s_new = c;
2092 s_new++;
2093 }
2094 for (i = 1; i < n; i++) {
2095 int c = Py_CHARMASK(*s++);
2096 if (isupper(c))
2097 *s_new = tolower(c);
2098 else
2099 *s_new = c;
2100 s_new++;
2101 }
2102 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002103}
2104
2105
2106PyDoc_STRVAR(count__doc__,
2107"S.count(sub[, start[, end]]) -> int\n\
2108\n\
2109Return the number of non-overlapping occurrences of substring sub in\n\
2110string S[start:end]. Optional arguments start and end are interpreted\n\
2111as in slice notation.");
2112
2113static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002114string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002115{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002116 PyObject *sub_obj;
2117 const char *str = PyString_AS_STRING(self), *sub;
2118 Py_ssize_t sub_len;
2119 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002120
Jesus Cea44e81682011-04-20 16:39:15 +02002121 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002122 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 if (PyString_Check(sub_obj)) {
2125 sub = PyString_AS_STRING(sub_obj);
2126 sub_len = PyString_GET_SIZE(sub_obj);
2127 }
Christian Heimes44720832008-05-26 13:01:01 +00002128#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002129 else if (PyUnicode_Check(sub_obj)) {
2130 Py_ssize_t count;
2131 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2132 if (count == -1)
2133 return NULL;
2134 else
2135 return PyInt_FromSsize_t(count);
2136 }
Christian Heimes44720832008-05-26 13:01:01 +00002137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002138 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2139 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002140
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002141 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002142
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002143 return PyInt_FromSsize_t(
2144 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2145 );
Christian Heimes44720832008-05-26 13:01:01 +00002146}
2147
2148PyDoc_STRVAR(swapcase__doc__,
2149"S.swapcase() -> string\n\
2150\n\
2151Return a copy of the string S with uppercase characters\n\
2152converted to lowercase and vice versa.");
2153
2154static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002155string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002156{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002157 char *s = PyString_AS_STRING(self), *s_new;
2158 Py_ssize_t i, n = PyString_GET_SIZE(self);
2159 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002161 newobj = PyString_FromStringAndSize(NULL, n);
2162 if (newobj == NULL)
2163 return NULL;
2164 s_new = PyString_AsString(newobj);
2165 for (i = 0; i < n; i++) {
2166 int c = Py_CHARMASK(*s++);
2167 if (islower(c)) {
2168 *s_new = toupper(c);
2169 }
2170 else if (isupper(c)) {
2171 *s_new = tolower(c);
2172 }
2173 else
2174 *s_new = c;
2175 s_new++;
2176 }
2177 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002178}
2179
2180
2181PyDoc_STRVAR(translate__doc__,
2182"S.translate(table [,deletechars]) -> string\n\
2183\n\
2184Return a copy of the string S, where all characters occurring\n\
2185in the optional argument deletechars are removed, and the\n\
2186remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002187translation table, which must be a string of length 256 or None.\n\
2188If the table argument is None, no translation is applied and\n\
2189the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002190
2191static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002192string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002193{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002194 register char *input, *output;
2195 const char *table;
2196 register Py_ssize_t i, c, changed = 0;
2197 PyObject *input_obj = (PyObject*)self;
2198 const char *output_start, *del_table=NULL;
2199 Py_ssize_t inlen, tablen, dellen = 0;
2200 PyObject *result;
2201 int trans_table[256];
2202 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002203
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002204 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2205 &tableobj, &delobj))
2206 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002207
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002208 if (PyString_Check(tableobj)) {
2209 table = PyString_AS_STRING(tableobj);
2210 tablen = PyString_GET_SIZE(tableobj);
2211 }
2212 else if (tableobj == Py_None) {
2213 table = NULL;
2214 tablen = 256;
2215 }
Christian Heimes44720832008-05-26 13:01:01 +00002216#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002217 else if (PyUnicode_Check(tableobj)) {
2218 /* Unicode .translate() does not support the deletechars
2219 parameter; instead a mapping to None will cause characters
2220 to be deleted. */
2221 if (delobj != NULL) {
2222 PyErr_SetString(PyExc_TypeError,
2223 "deletions are implemented differently for unicode");
2224 return NULL;
2225 }
2226 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2227 }
Christian Heimes44720832008-05-26 13:01:01 +00002228#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002229 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2230 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002231
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002232 if (tablen != 256) {
2233 PyErr_SetString(PyExc_ValueError,
2234 "translation table must be 256 characters long");
2235 return NULL;
2236 }
Christian Heimes44720832008-05-26 13:01:01 +00002237
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002238 if (delobj != NULL) {
2239 if (PyString_Check(delobj)) {
2240 del_table = PyString_AS_STRING(delobj);
2241 dellen = PyString_GET_SIZE(delobj);
2242 }
Christian Heimes44720832008-05-26 13:01:01 +00002243#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002244 else if (PyUnicode_Check(delobj)) {
2245 PyErr_SetString(PyExc_TypeError,
2246 "deletions are implemented differently for unicode");
2247 return NULL;
2248 }
Christian Heimes44720832008-05-26 13:01:01 +00002249#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002250 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2251 return NULL;
2252 }
2253 else {
2254 del_table = NULL;
2255 dellen = 0;
2256 }
Christian Heimes44720832008-05-26 13:01:01 +00002257
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002258 inlen = PyString_GET_SIZE(input_obj);
2259 result = PyString_FromStringAndSize((char *)NULL, inlen);
2260 if (result == NULL)
2261 return NULL;
2262 output_start = output = PyString_AsString(result);
2263 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002264
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002265 if (dellen == 0 && table != NULL) {
2266 /* If no deletions are required, use faster code */
2267 for (i = inlen; --i >= 0; ) {
2268 c = Py_CHARMASK(*input++);
2269 if (Py_CHARMASK((*output++ = table[c])) != c)
2270 changed = 1;
2271 }
2272 if (changed || !PyString_CheckExact(input_obj))
2273 return result;
2274 Py_DECREF(result);
2275 Py_INCREF(input_obj);
2276 return input_obj;
2277 }
Christian Heimes44720832008-05-26 13:01:01 +00002278
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002279 if (table == NULL) {
2280 for (i = 0; i < 256; i++)
2281 trans_table[i] = Py_CHARMASK(i);
2282 } else {
2283 for (i = 0; i < 256; i++)
2284 trans_table[i] = Py_CHARMASK(table[i]);
2285 }
Christian Heimes44720832008-05-26 13:01:01 +00002286
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002287 for (i = 0; i < dellen; i++)
2288 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002289
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002290 for (i = inlen; --i >= 0; ) {
2291 c = Py_CHARMASK(*input++);
2292 if (trans_table[c] != -1)
2293 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2294 continue;
2295 changed = 1;
2296 }
2297 if (!changed && PyString_CheckExact(input_obj)) {
2298 Py_DECREF(result);
2299 Py_INCREF(input_obj);
2300 return input_obj;
2301 }
2302 /* Fix the size of the resulting string */
2303 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2304 return NULL;
2305 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002306}
2307
2308
Christian Heimes44720832008-05-26 13:01:01 +00002309/* find and count characters and substrings */
2310
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002311#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002312 ((char *)memchr((const void *)(target), c, target_len))
2313
2314/* String ops must return a string. */
2315/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002316Py_LOCAL(PyStringObject *)
2317return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002318{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002319 if (PyString_CheckExact(self)) {
2320 Py_INCREF(self);
2321 return self;
2322 }
2323 return (PyStringObject *)PyString_FromStringAndSize(
2324 PyString_AS_STRING(self),
2325 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002326}
2327
2328Py_LOCAL_INLINE(Py_ssize_t)
2329countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2330{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002331 Py_ssize_t count=0;
2332 const char *start=target;
2333 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002334
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002335 while ( (start=findchar(start, end-start, c)) != NULL ) {
2336 count++;
2337 if (count >= maxcount)
2338 break;
2339 start += 1;
2340 }
2341 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002342}
2343
Christian Heimes44720832008-05-26 13:01:01 +00002344
2345/* Algorithms for different cases of string replacement */
2346
2347/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002348Py_LOCAL(PyStringObject *)
2349replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002350 const char *to_s, Py_ssize_t to_len,
2351 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002352{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002353 char *self_s, *result_s;
2354 Py_ssize_t self_len, result_len;
2355 Py_ssize_t count, i, product;
2356 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002357
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002358 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002359
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002360 /* 1 at the end plus 1 after every character */
2361 count = self_len+1;
2362 if (maxcount < count)
2363 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002364
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002365 /* Check for overflow */
2366 /* result_len = count * to_len + self_len; */
2367 product = count * to_len;
2368 if (product / to_len != count) {
2369 PyErr_SetString(PyExc_OverflowError,
2370 "replace string is too long");
2371 return NULL;
2372 }
2373 result_len = product + self_len;
2374 if (result_len < 0) {
2375 PyErr_SetString(PyExc_OverflowError,
2376 "replace string is too long");
2377 return NULL;
2378 }
Christian Heimes44720832008-05-26 13:01:01 +00002379
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002380 if (! (result = (PyStringObject *)
2381 PyString_FromStringAndSize(NULL, result_len)) )
2382 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002383
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002384 self_s = PyString_AS_STRING(self);
2385 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002386
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002387 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 /* Lay the first one down (guaranteed this will occur) */
2390 Py_MEMCPY(result_s, to_s, to_len);
2391 result_s += to_len;
2392 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002393
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002394 for (i=0; i<count; i++) {
2395 *result_s++ = *self_s++;
2396 Py_MEMCPY(result_s, to_s, to_len);
2397 result_s += to_len;
2398 }
2399
2400 /* Copy the rest of the original string */
2401 Py_MEMCPY(result_s, self_s, self_len-i);
2402
2403 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002404}
2405
2406/* Special case for deleting a single character */
2407/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002408Py_LOCAL(PyStringObject *)
2409replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002410 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002411{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002412 char *self_s, *result_s;
2413 char *start, *next, *end;
2414 Py_ssize_t self_len, result_len;
2415 Py_ssize_t count;
2416 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002417
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002418 self_len = PyString_GET_SIZE(self);
2419 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002420
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002421 count = countchar(self_s, self_len, from_c, maxcount);
2422 if (count == 0) {
2423 return return_self(self);
2424 }
Christian Heimes44720832008-05-26 13:01:01 +00002425
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002426 result_len = self_len - count; /* from_len == 1 */
2427 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002428
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002429 if ( (result = (PyStringObject *)
2430 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2431 return NULL;
2432 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002433
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002434 start = self_s;
2435 end = self_s + self_len;
2436 while (count-- > 0) {
2437 next = findchar(start, end-start, from_c);
2438 if (next == NULL)
2439 break;
2440 Py_MEMCPY(result_s, start, next-start);
2441 result_s += (next-start);
2442 start = next+1;
2443 }
2444 Py_MEMCPY(result_s, start, end-start);
2445
2446 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002447}
2448
2449/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2450
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002451Py_LOCAL(PyStringObject *)
2452replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002453 const char *from_s, Py_ssize_t from_len,
2454 Py_ssize_t maxcount) {
2455 char *self_s, *result_s;
2456 char *start, *next, *end;
2457 Py_ssize_t self_len, result_len;
2458 Py_ssize_t count, offset;
2459 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002460
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002461 self_len = PyString_GET_SIZE(self);
2462 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002463
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002464 count = stringlib_count(self_s, self_len,
2465 from_s, from_len,
2466 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002467
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002468 if (count == 0) {
2469 /* no matches */
2470 return return_self(self);
2471 }
Christian Heimes44720832008-05-26 13:01:01 +00002472
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002473 result_len = self_len - (count * from_len);
2474 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002475
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002476 if ( (result = (PyStringObject *)
2477 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2478 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002479
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002480 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002481
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002482 start = self_s;
2483 end = self_s + self_len;
2484 while (count-- > 0) {
2485 offset = stringlib_find(start, end-start,
2486 from_s, from_len,
2487 0);
2488 if (offset == -1)
2489 break;
2490 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002491
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002492 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002493
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002494 result_s += (next-start);
2495 start = next+from_len;
2496 }
2497 Py_MEMCPY(result_s, start, end-start);
2498 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002499}
2500
2501/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002502Py_LOCAL(PyStringObject *)
2503replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002504 char from_c, char to_c,
2505 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002506{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002507 char *self_s, *result_s, *start, *end, *next;
2508 Py_ssize_t self_len;
2509 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002510
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002511 /* The result string will be the same size */
2512 self_s = PyString_AS_STRING(self);
2513 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002514
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002515 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002517 if (next == NULL) {
2518 /* No matches; return the original string */
2519 return return_self(self);
2520 }
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002522 /* Need to make a new string */
2523 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2524 if (result == NULL)
2525 return NULL;
2526 result_s = PyString_AS_STRING(result);
2527 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002528
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002529 /* change everything in-place, starting with this one */
2530 start = result_s + (next-self_s);
2531 *start = to_c;
2532 start++;
2533 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002534
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002535 while (--maxcount > 0) {
2536 next = findchar(start, end-start, from_c);
2537 if (next == NULL)
2538 break;
2539 *next = to_c;
2540 start = next+1;
2541 }
Christian Heimes44720832008-05-26 13:01:01 +00002542
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002543 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002544}
2545
2546/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002547Py_LOCAL(PyStringObject *)
2548replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002549 const char *from_s, Py_ssize_t from_len,
2550 const char *to_s, Py_ssize_t to_len,
2551 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002552{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002553 char *result_s, *start, *end;
2554 char *self_s;
2555 Py_ssize_t self_len, offset;
2556 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002557
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002558 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002559
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 self_s = PyString_AS_STRING(self);
2561 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002562
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002563 offset = stringlib_find(self_s, self_len,
2564 from_s, from_len,
2565 0);
2566 if (offset == -1) {
2567 /* No matches; return the original string */
2568 return return_self(self);
2569 }
Christian Heimes44720832008-05-26 13:01:01 +00002570
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002571 /* Need to make a new string */
2572 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2573 if (result == NULL)
2574 return NULL;
2575 result_s = PyString_AS_STRING(result);
2576 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002577
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002578 /* change everything in-place, starting with this one */
2579 start = result_s + offset;
2580 Py_MEMCPY(start, to_s, from_len);
2581 start += from_len;
2582 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002583
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002584 while ( --maxcount > 0) {
2585 offset = stringlib_find(start, end-start,
2586 from_s, from_len,
2587 0);
2588 if (offset==-1)
2589 break;
2590 Py_MEMCPY(start+offset, to_s, from_len);
2591 start += offset+from_len;
2592 }
Christian Heimes44720832008-05-26 13:01:01 +00002593
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002594 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002595}
2596
2597/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002598Py_LOCAL(PyStringObject *)
2599replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002600 char from_c,
2601 const char *to_s, Py_ssize_t to_len,
2602 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002603{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002604 char *self_s, *result_s;
2605 char *start, *next, *end;
2606 Py_ssize_t self_len, result_len;
2607 Py_ssize_t count, product;
2608 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002609
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002610 self_s = PyString_AS_STRING(self);
2611 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002612
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002613 count = countchar(self_s, self_len, from_c, maxcount);
2614 if (count == 0) {
2615 /* no matches, return unchanged */
2616 return return_self(self);
2617 }
Christian Heimes44720832008-05-26 13:01:01 +00002618
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002619 /* use the difference between current and new, hence the "-1" */
2620 /* result_len = self_len + count * (to_len-1) */
2621 product = count * (to_len-1);
2622 if (product / (to_len-1) != count) {
2623 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2624 return NULL;
2625 }
2626 result_len = self_len + product;
2627 if (result_len < 0) {
2628 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2629 return NULL;
2630 }
Christian Heimes44720832008-05-26 13:01:01 +00002631
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002632 if ( (result = (PyStringObject *)
2633 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2634 return NULL;
2635 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002636
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002637 start = self_s;
2638 end = self_s + self_len;
2639 while (count-- > 0) {
2640 next = findchar(start, end-start, from_c);
2641 if (next == NULL)
2642 break;
Christian Heimes44720832008-05-26 13:01:01 +00002643
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002644 if (next == start) {
2645 /* replace with the 'to' */
2646 Py_MEMCPY(result_s, to_s, to_len);
2647 result_s += to_len;
2648 start += 1;
2649 } else {
2650 /* copy the unchanged old then the 'to' */
2651 Py_MEMCPY(result_s, start, next-start);
2652 result_s += (next-start);
2653 Py_MEMCPY(result_s, to_s, to_len);
2654 result_s += to_len;
2655 start = next+1;
2656 }
2657 }
2658 /* Copy the remainder of the remaining string */
2659 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002660
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002661 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002662}
2663
2664/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002665Py_LOCAL(PyStringObject *)
2666replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002667 const char *from_s, Py_ssize_t from_len,
2668 const char *to_s, Py_ssize_t to_len,
2669 Py_ssize_t maxcount) {
2670 char *self_s, *result_s;
2671 char *start, *next, *end;
2672 Py_ssize_t self_len, result_len;
2673 Py_ssize_t count, offset, product;
2674 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002675
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002676 self_s = PyString_AS_STRING(self);
2677 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002678
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002679 count = stringlib_count(self_s, self_len,
2680 from_s, from_len,
2681 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002682
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002683 if (count == 0) {
2684 /* no matches, return unchanged */
2685 return return_self(self);
2686 }
Christian Heimes44720832008-05-26 13:01:01 +00002687
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002688 /* Check for overflow */
2689 /* result_len = self_len + count * (to_len-from_len) */
2690 product = count * (to_len-from_len);
2691 if (product / (to_len-from_len) != count) {
2692 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2693 return NULL;
2694 }
2695 result_len = self_len + product;
2696 if (result_len < 0) {
2697 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2698 return NULL;
2699 }
Christian Heimes44720832008-05-26 13:01:01 +00002700
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002701 if ( (result = (PyStringObject *)
2702 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2703 return NULL;
2704 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002705
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002706 start = self_s;
2707 end = self_s + self_len;
2708 while (count-- > 0) {
2709 offset = stringlib_find(start, end-start,
2710 from_s, from_len,
2711 0);
2712 if (offset == -1)
2713 break;
2714 next = start+offset;
2715 if (next == start) {
2716 /* replace with the 'to' */
2717 Py_MEMCPY(result_s, to_s, to_len);
2718 result_s += to_len;
2719 start += from_len;
2720 } else {
2721 /* copy the unchanged old then the 'to' */
2722 Py_MEMCPY(result_s, start, next-start);
2723 result_s += (next-start);
2724 Py_MEMCPY(result_s, to_s, to_len);
2725 result_s += to_len;
2726 start = next+from_len;
2727 }
2728 }
2729 /* Copy the remainder of the remaining string */
2730 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002731
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002732 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002733}
2734
2735
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002736Py_LOCAL(PyStringObject *)
2737replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002738 const char *from_s, Py_ssize_t from_len,
2739 const char *to_s, Py_ssize_t to_len,
2740 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002741{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002742 if (maxcount < 0) {
2743 maxcount = PY_SSIZE_T_MAX;
2744 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2745 /* nothing to do; return the original string */
2746 return return_self(self);
2747 }
Christian Heimes44720832008-05-26 13:01:01 +00002748
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002749 if (maxcount == 0 ||
2750 (from_len == 0 && to_len == 0)) {
2751 /* nothing to do; return the original string */
2752 return return_self(self);
2753 }
Christian Heimes44720832008-05-26 13:01:01 +00002754
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002755 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002756
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002757 if (from_len == 0) {
2758 /* insert the 'to' string everywhere. */
2759 /* >>> "Python".replace("", ".") */
2760 /* '.P.y.t.h.o.n.' */
2761 return replace_interleave(self, to_s, to_len, maxcount);
2762 }
Christian Heimes44720832008-05-26 13:01:01 +00002763
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002764 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2765 /* point for an empty self string to generate a non-empty string */
2766 /* Special case so the remaining code always gets a non-empty string */
2767 if (PyString_GET_SIZE(self) == 0) {
2768 return return_self(self);
2769 }
Christian Heimes44720832008-05-26 13:01:01 +00002770
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002771 if (to_len == 0) {
2772 /* delete all occurances of 'from' string */
2773 if (from_len == 1) {
2774 return replace_delete_single_character(
2775 self, from_s[0], maxcount);
2776 } else {
2777 return replace_delete_substring(self, from_s, from_len, maxcount);
2778 }
2779 }
Christian Heimes44720832008-05-26 13:01:01 +00002780
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002781 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002782
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002783 if (from_len == to_len) {
2784 if (from_len == 1) {
2785 return replace_single_character_in_place(
2786 self,
2787 from_s[0],
2788 to_s[0],
2789 maxcount);
2790 } else {
2791 return replace_substring_in_place(
2792 self, from_s, from_len, to_s, to_len, maxcount);
2793 }
2794 }
Christian Heimes44720832008-05-26 13:01:01 +00002795
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002796 /* Otherwise use the more generic algorithms */
2797 if (from_len == 1) {
2798 return replace_single_character(self, from_s[0],
2799 to_s, to_len, maxcount);
2800 } else {
2801 /* len('from')>=2, len('to')>=1 */
2802 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2803 }
Christian Heimes44720832008-05-26 13:01:01 +00002804}
2805
2806PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002807"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002808\n\
2809Return a copy of string S with all occurrences of substring\n\
2810old replaced by new. If the optional argument count is\n\
2811given, only the first count occurrences are replaced.");
2812
2813static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002814string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002815{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002816 Py_ssize_t count = -1;
2817 PyObject *from, *to;
2818 const char *from_s, *to_s;
2819 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002820
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002821 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2822 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002823
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002824 if (PyString_Check(from)) {
2825 from_s = PyString_AS_STRING(from);
2826 from_len = PyString_GET_SIZE(from);
2827 }
Christian Heimes44720832008-05-26 13:01:01 +00002828#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002829 if (PyUnicode_Check(from))
2830 return PyUnicode_Replace((PyObject *)self,
2831 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002832#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002833 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2834 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002835
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002836 if (PyString_Check(to)) {
2837 to_s = PyString_AS_STRING(to);
2838 to_len = PyString_GET_SIZE(to);
2839 }
Christian Heimes44720832008-05-26 13:01:01 +00002840#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002841 else if (PyUnicode_Check(to))
2842 return PyUnicode_Replace((PyObject *)self,
2843 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002844#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002845 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2846 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002847
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002848 return (PyObject *)replace((PyStringObject *) self,
2849 from_s, from_len,
2850 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002851}
2852
2853/** End DALKE **/
2854
2855/* Matches the end (direction >= 0) or start (direction < 0) of self
2856 * against substr, using the start and end arguments. Returns
2857 * -1 on error, 0 if not found and 1 if found.
2858 */
2859Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002860_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002861 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002862{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002863 Py_ssize_t len = PyString_GET_SIZE(self);
2864 Py_ssize_t slen;
2865 const char* sub;
2866 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002867
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002868 if (PyString_Check(substr)) {
2869 sub = PyString_AS_STRING(substr);
2870 slen = PyString_GET_SIZE(substr);
2871 }
Christian Heimes44720832008-05-26 13:01:01 +00002872#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002873 else if (PyUnicode_Check(substr))
2874 return PyUnicode_Tailmatch((PyObject *)self,
2875 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002876#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002877 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2878 return -1;
2879 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002880
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002881 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002882
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002883 if (direction < 0) {
2884 /* startswith */
2885 if (start+slen > len)
2886 return 0;
2887 } else {
2888 /* endswith */
2889 if (end-start < slen || start > len)
2890 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002891
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002892 if (end-slen > start)
2893 start = end - slen;
2894 }
2895 if (end-start >= slen)
2896 return ! memcmp(str+start, sub, slen);
2897 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002898}
2899
2900
2901PyDoc_STRVAR(startswith__doc__,
2902"S.startswith(prefix[, start[, end]]) -> bool\n\
2903\n\
2904Return True if S starts with the specified prefix, False otherwise.\n\
2905With optional start, test S beginning at that position.\n\
2906With optional end, stop comparing S at that position.\n\
2907prefix can also be a tuple of strings to try.");
2908
2909static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002910string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002911{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002912 Py_ssize_t start = 0;
2913 Py_ssize_t end = PY_SSIZE_T_MAX;
2914 PyObject *subobj;
2915 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002916
Jesus Cea44e81682011-04-20 16:39:15 +02002917 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002918 return NULL;
2919 if (PyTuple_Check(subobj)) {
2920 Py_ssize_t i;
2921 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2922 result = _string_tailmatch(self,
2923 PyTuple_GET_ITEM(subobj, i),
2924 start, end, -1);
2925 if (result == -1)
2926 return NULL;
2927 else if (result) {
2928 Py_RETURN_TRUE;
2929 }
2930 }
2931 Py_RETURN_FALSE;
2932 }
2933 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002934 if (result == -1) {
2935 if (PyErr_ExceptionMatches(PyExc_TypeError))
2936 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2937 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002938 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002939 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002940 else
2941 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002942}
2943
2944
2945PyDoc_STRVAR(endswith__doc__,
2946"S.endswith(suffix[, start[, end]]) -> bool\n\
2947\n\
2948Return True if S ends with the specified suffix, False otherwise.\n\
2949With optional start, test S beginning at that position.\n\
2950With optional end, stop comparing S at that position.\n\
2951suffix can also be a tuple of strings to try.");
2952
2953static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002954string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002955{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002956 Py_ssize_t start = 0;
2957 Py_ssize_t end = PY_SSIZE_T_MAX;
2958 PyObject *subobj;
2959 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002960
Jesus Cea44e81682011-04-20 16:39:15 +02002961 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002962 return NULL;
2963 if (PyTuple_Check(subobj)) {
2964 Py_ssize_t i;
2965 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966 result = _string_tailmatch(self,
2967 PyTuple_GET_ITEM(subobj, i),
2968 start, end, +1);
2969 if (result == -1)
2970 return NULL;
2971 else if (result) {
2972 Py_RETURN_TRUE;
2973 }
2974 }
2975 Py_RETURN_FALSE;
2976 }
2977 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002978 if (result == -1) {
2979 if (PyErr_ExceptionMatches(PyExc_TypeError))
2980 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2981 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002982 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002983 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002984 else
2985 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002986}
2987
2988
2989PyDoc_STRVAR(encode__doc__,
2990"S.encode([encoding[,errors]]) -> object\n\
2991\n\
2992Encodes S using the codec registered for encoding. encoding defaults\n\
2993to the default encoding. errors may be given to set a different error\n\
2994handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2995a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2996'xmlcharrefreplace' as well as any other name registered with\n\
2997codecs.register_error that is able to handle UnicodeEncodeErrors.");
2998
2999static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003000string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003001{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003002 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003003 char *encoding = NULL;
3004 char *errors = NULL;
3005 PyObject *v;
3006
Benjamin Peterson332d7212009-09-18 21:14:55 +00003007 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003008 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003009 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003010 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003011 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003012 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003013 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003014 PyErr_Format(PyExc_TypeError,
3015 "encoder did not return a string/unicode object "
3016 "(type=%.400s)",
3017 Py_TYPE(v)->tp_name);
3018 Py_DECREF(v);
3019 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003020 }
3021 return v;
3022
3023 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003024 return NULL;
3025}
3026
Christian Heimes44720832008-05-26 13:01:01 +00003027
3028PyDoc_STRVAR(decode__doc__,
3029"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003030\n\
Christian Heimes44720832008-05-26 13:01:01 +00003031Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003033handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3034a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003035as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003036able to handle UnicodeDecodeErrors.");
3037
3038static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003039string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003040{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003041 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003042 char *encoding = NULL;
3043 char *errors = NULL;
3044 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003045
Benjamin Peterson332d7212009-09-18 21:14:55 +00003046 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003047 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003048 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003049 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003050 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003051 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003052 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003053 PyErr_Format(PyExc_TypeError,
3054 "decoder did not return a string/unicode object "
3055 "(type=%.400s)",
3056 Py_TYPE(v)->tp_name);
3057 Py_DECREF(v);
3058 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003059 }
3060 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003061
Christian Heimes44720832008-05-26 13:01:01 +00003062 onError:
3063 return NULL;
3064}
3065
3066
3067PyDoc_STRVAR(expandtabs__doc__,
3068"S.expandtabs([tabsize]) -> string\n\
3069\n\
3070Return a copy of S where all tab characters are expanded using spaces.\n\
3071If tabsize is not given, a tab size of 8 characters is assumed.");
3072
3073static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003074string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003075{
3076 const char *e, *p, *qe;
3077 char *q;
3078 Py_ssize_t i, j, incr;
3079 PyObject *u;
3080 int tabsize = 8;
3081
3082 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003083 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003084
3085 /* First pass: determine size of output string */
3086 i = 0; /* chars up to and including most recent \n or \r */
3087 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003088 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3089 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003090 if (*p == '\t') {
3091 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003092 incr = tabsize - (j % tabsize);
3093 if (j > PY_SSIZE_T_MAX - incr)
3094 goto overflow1;
3095 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003096 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003097 }
3098 else {
3099 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003100 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003101 j++;
3102 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003103 if (i > PY_SSIZE_T_MAX - j)
3104 goto overflow1;
3105 i += j;
3106 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003107 }
3108 }
Christian Heimes44720832008-05-26 13:01:01 +00003109
3110 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003111 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003112
3113 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003114 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003115 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003116 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003117
3118 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003119 q = PyString_AS_STRING(u); /* next output char */
3120 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003121
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003122 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003123 if (*p == '\t') {
3124 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003125 i = tabsize - (j % tabsize);
3126 j += i;
3127 while (i--) {
3128 if (q >= qe)
3129 goto overflow2;
3130 *q++ = ' ';
3131 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003132 }
3133 }
3134 else {
3135 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003136 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003137 *q++ = *p;
3138 j++;
3139 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003140 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003141 }
Christian Heimes44720832008-05-26 13:01:01 +00003142
3143 return u;
3144
3145 overflow2:
3146 Py_DECREF(u);
3147 overflow1:
3148 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3149 return NULL;
3150}
3151
3152Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003153pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003154{
3155 PyObject *u;
3156
3157 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003158 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003159 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003160 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003161
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003162 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003163 Py_INCREF(self);
3164 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003165 }
3166
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003167 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003168 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003169 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003170 if (left)
3171 memset(PyString_AS_STRING(u), fill, left);
3172 Py_MEMCPY(PyString_AS_STRING(u) + left,
3173 PyString_AS_STRING(self),
3174 PyString_GET_SIZE(self));
3175 if (right)
3176 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3177 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003178 }
3179
3180 return u;
3181}
3182
3183PyDoc_STRVAR(ljust__doc__,
3184"S.ljust(width[, fillchar]) -> string\n"
3185"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003186"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003187"done using the specified fill character (default is a space).");
3188
3189static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003190string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003191{
3192 Py_ssize_t width;
3193 char fillchar = ' ';
3194
3195 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003196 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003197
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003198 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003199 Py_INCREF(self);
3200 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003201 }
3202
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003204}
3205
3206
3207PyDoc_STRVAR(rjust__doc__,
3208"S.rjust(width[, fillchar]) -> string\n"
3209"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003210"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003211"done using the specified fill character (default is a space)");
3212
3213static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003214string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003215{
3216 Py_ssize_t width;
3217 char fillchar = ' ';
3218
3219 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003220 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003221
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003222 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003223 Py_INCREF(self);
3224 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003225 }
3226
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003228}
3229
3230
3231PyDoc_STRVAR(center__doc__,
3232"S.center(width[, fillchar]) -> string\n"
3233"\n"
3234"Return S centered in a string of length width. Padding is\n"
3235"done using the specified fill character (default is a space)");
3236
3237static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003238string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003239{
3240 Py_ssize_t marg, left;
3241 Py_ssize_t width;
3242 char fillchar = ' ';
3243
3244 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003245 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003246
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003247 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003248 Py_INCREF(self);
3249 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003250 }
3251
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003252 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003253 left = marg / 2 + (marg & width & 1);
3254
3255 return pad(self, left, marg - left, fillchar);
3256}
3257
3258PyDoc_STRVAR(zfill__doc__,
3259"S.zfill(width) -> string\n"
3260"\n"
3261"Pad a numeric string S with zeros on the left, to fill a field\n"
3262"of the specified width. The string S is never truncated.");
3263
3264static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003265string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003266{
3267 Py_ssize_t fill;
3268 PyObject *s;
3269 char *p;
3270 Py_ssize_t width;
3271
3272 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003273 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003274
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003275 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003276 if (PyString_CheckExact(self)) {
3277 Py_INCREF(self);
3278 return (PyObject*) self;
3279 }
3280 else
3281 return PyString_FromStringAndSize(
3282 PyString_AS_STRING(self),
3283 PyString_GET_SIZE(self)
3284 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003285 }
3286
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003287 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003288
Christian Heimes44720832008-05-26 13:01:01 +00003289 s = pad(self, fill, 0, '0');
3290
3291 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003292 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003293
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003294 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003295 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003296 /* move sign to beginning of string */
3297 p[0] = p[fill];
3298 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003299 }
3300
3301 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003302}
3303
Christian Heimes44720832008-05-26 13:01:01 +00003304PyDoc_STRVAR(isspace__doc__,
3305"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003306\n\
Christian Heimes44720832008-05-26 13:01:01 +00003307Return True if all characters in S are whitespace\n\
3308and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003309
Christian Heimes44720832008-05-26 13:01:01 +00003310static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003311string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003312{
Christian Heimes44720832008-05-26 13:01:01 +00003313 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003314 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003315 register const unsigned char *e;
3316
3317 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003318 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003319 isspace(*p))
3320 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003321
3322 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003323 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003325
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003326 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003327 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003328 if (!isspace(*p))
3329 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003330 }
Christian Heimes44720832008-05-26 13:01:01 +00003331 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003332}
3333
Christian Heimes44720832008-05-26 13:01:01 +00003334
3335PyDoc_STRVAR(isalpha__doc__,
3336"S.isalpha() -> bool\n\
3337\n\
3338Return True if all characters in S are alphabetic\n\
3339and there is at least one character in S, False otherwise.");
3340
3341static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003342string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003343{
Christian Heimes44720832008-05-26 13:01:01 +00003344 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003345 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003346 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003347
Christian Heimes44720832008-05-26 13:01:01 +00003348 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003349 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003350 isalpha(*p))
3351 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003352
3353 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003354 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003356
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003357 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003358 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003359 if (!isalpha(*p))
3360 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003361 }
Christian Heimes44720832008-05-26 13:01:01 +00003362 return PyBool_FromLong(1);
3363}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003364
Christian Heimes44720832008-05-26 13:01:01 +00003365
3366PyDoc_STRVAR(isalnum__doc__,
3367"S.isalnum() -> bool\n\
3368\n\
3369Return True if all characters in S are alphanumeric\n\
3370and there is at least one character in S, False otherwise.");
3371
3372static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003373string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003374{
3375 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003376 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003377 register const unsigned char *e;
3378
3379 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003380 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003381 isalnum(*p))
3382 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003383
3384 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003385 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003387
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003388 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003389 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003390 if (!isalnum(*p))
3391 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003392 }
3393 return PyBool_FromLong(1);
3394}
3395
3396
3397PyDoc_STRVAR(isdigit__doc__,
3398"S.isdigit() -> bool\n\
3399\n\
3400Return True if all characters in S are digits\n\
3401and there is at least one character in S, False otherwise.");
3402
3403static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003404string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003405{
3406 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003407 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003408 register const unsigned char *e;
3409
3410 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003411 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003412 isdigit(*p))
3413 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003414
3415 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003416 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003418
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003419 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003420 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003421 if (!isdigit(*p))
3422 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003423 }
3424 return PyBool_FromLong(1);
3425}
3426
3427
3428PyDoc_STRVAR(islower__doc__,
3429"S.islower() -> bool\n\
3430\n\
3431Return True if all cased characters in S are lowercase and there is\n\
3432at least one cased character in S, False otherwise.");
3433
3434static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003435string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003436{
3437 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003438 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003439 register const unsigned char *e;
3440 int cased;
3441
3442 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003443 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003444 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003445
3446 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003447 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003448 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003450 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003451 cased = 0;
3452 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003453 if (isupper(*p))
3454 return PyBool_FromLong(0);
3455 else if (!cased && islower(*p))
3456 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003457 }
3458 return PyBool_FromLong(cased);
3459}
3460
3461
3462PyDoc_STRVAR(isupper__doc__,
3463"S.isupper() -> bool\n\
3464\n\
3465Return True if all cased characters in S are uppercase and there is\n\
3466at least one cased character in S, False otherwise.");
3467
3468static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003469string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003470{
3471 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003472 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003473 register const unsigned char *e;
3474 int cased;
3475
3476 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003477 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003478 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003479
3480 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003481 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003482 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003483
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003484 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003485 cased = 0;
3486 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003487 if (islower(*p))
3488 return PyBool_FromLong(0);
3489 else if (!cased && isupper(*p))
3490 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003491 }
3492 return PyBool_FromLong(cased);
3493}
3494
3495
3496PyDoc_STRVAR(istitle__doc__,
3497"S.istitle() -> bool\n\
3498\n\
3499Return True if S is a titlecased string and there is at least one\n\
3500character in S, i.e. uppercase characters may only follow uncased\n\
3501characters and lowercase characters only cased ones. Return False\n\
3502otherwise.");
3503
3504static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003505string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003506{
3507 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003508 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003509 register const unsigned char *e;
3510 int cased, previous_is_cased;
3511
3512 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003513 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003514 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003515
3516 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003517 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003518 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003519
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003520 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003521 cased = 0;
3522 previous_is_cased = 0;
3523 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003524 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003525
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003526 if (isupper(ch)) {
3527 if (previous_is_cased)
3528 return PyBool_FromLong(0);
3529 previous_is_cased = 1;
3530 cased = 1;
3531 }
3532 else if (islower(ch)) {
3533 if (!previous_is_cased)
3534 return PyBool_FromLong(0);
3535 previous_is_cased = 1;
3536 cased = 1;
3537 }
3538 else
3539 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003540 }
3541 return PyBool_FromLong(cased);
3542}
3543
3544
3545PyDoc_STRVAR(splitlines__doc__,
3546"S.splitlines([keepends]) -> list of strings\n\
3547\n\
3548Return a list of the lines in S, breaking at line boundaries.\n\
3549Line breaks are not included in the resulting list unless keepends\n\
3550is given and true.");
3551
3552static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003553string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003554{
Christian Heimes44720832008-05-26 13:01:01 +00003555 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003556
3557 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003558 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003559
Antoine Pitrou64672132010-01-13 07:55:48 +00003560 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003561 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3562 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003563 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003564}
3565
Robert Schuppenies51df0642008-06-01 16:16:17 +00003566PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003567"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003568
3569static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003570string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003571{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003572 Py_ssize_t res;
3573 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3574 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003575}
3576
Christian Heimes1a6387e2008-03-26 12:49:49 +00003577static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003578string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003579{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003580 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003581}
3582
Christian Heimes1a6387e2008-03-26 12:49:49 +00003583
Christian Heimes44720832008-05-26 13:01:01 +00003584#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003585
Christian Heimes44720832008-05-26 13:01:01 +00003586PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003587"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003588\n\
Eric Smith6c840852010-11-06 19:43:44 +00003589Return a formatted version of S, using substitutions from args and kwargs.\n\
3590The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003591
Eric Smithdc13b792008-05-30 18:10:04 +00003592static PyObject *
3593string__format__(PyObject* self, PyObject* args)
3594{
3595 PyObject *format_spec;
3596 PyObject *result = NULL;
3597 PyObject *tmp = NULL;
3598
3599 /* If 2.x, convert format_spec to the same type as value */
3600 /* This is to allow things like u''.format('') */
3601 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003602 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003603 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003604 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3605 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3606 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003607 }
3608 tmp = PyObject_Str(format_spec);
3609 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003610 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003611 format_spec = tmp;
3612
3613 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003614 PyString_AS_STRING(format_spec),
3615 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003616done:
3617 Py_XDECREF(tmp);
3618 return result;
3619}
3620
Christian Heimes44720832008-05-26 13:01:01 +00003621PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003622"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003623\n\
Eric Smith6c840852010-11-06 19:43:44 +00003624Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003625
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003626
Christian Heimes1a6387e2008-03-26 12:49:49 +00003627static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003628string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003629 /* Counterparts of the obsolete stropmodule functions; except
3630 string.maketrans(). */
3631 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3632 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3633 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3634 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3635 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3636 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3637 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3638 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3639 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3640 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3641 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3642 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3643 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3644 capitalize__doc__},
3645 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3646 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3647 endswith__doc__},
3648 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3649 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3650 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3651 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3652 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3653 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3654 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3655 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3656 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3657 rpartition__doc__},
3658 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3659 startswith__doc__},
3660 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3661 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3662 swapcase__doc__},
3663 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3664 translate__doc__},
3665 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3666 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3667 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3668 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3669 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3670 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3671 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3672 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3673 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3674 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3675 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3676 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3677 expandtabs__doc__},
3678 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3679 splitlines__doc__},
3680 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3681 sizeof__doc__},
3682 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3683 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003684};
3685
3686static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003687str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003688
Christian Heimes44720832008-05-26 13:01:01 +00003689static PyObject *
3690string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003692 PyObject *x = NULL;
3693 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003694
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003695 if (type != &PyString_Type)
3696 return str_subtype_new(type, args, kwds);
3697 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3698 return NULL;
3699 if (x == NULL)
3700 return PyString_FromString("");
3701 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003702}
3703
3704static PyObject *
3705str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3706{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003707 PyObject *tmp, *pnew;
3708 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003709
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003710 assert(PyType_IsSubtype(type, &PyString_Type));
3711 tmp = string_new(&PyString_Type, args, kwds);
3712 if (tmp == NULL)
3713 return NULL;
3714 assert(PyString_CheckExact(tmp));
3715 n = PyString_GET_SIZE(tmp);
3716 pnew = type->tp_alloc(type, n);
3717 if (pnew != NULL) {
3718 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3719 ((PyStringObject *)pnew)->ob_shash =
3720 ((PyStringObject *)tmp)->ob_shash;
3721 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3722 }
3723 Py_DECREF(tmp);
3724 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003725}
3726
3727static PyObject *
3728basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3729{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003730 PyErr_SetString(PyExc_TypeError,
3731 "The basestring type cannot be instantiated");
3732 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003733}
3734
3735static PyObject *
3736string_mod(PyObject *v, PyObject *w)
3737{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003738 if (!PyString_Check(v)) {
3739 Py_INCREF(Py_NotImplemented);
3740 return Py_NotImplemented;
3741 }
3742 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003743}
3744
3745PyDoc_STRVAR(basestring_doc,
3746"Type basestring cannot be instantiated; it is the base for str and unicode.");
3747
3748static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003749 0, /*nb_add*/
3750 0, /*nb_subtract*/
3751 0, /*nb_multiply*/
3752 0, /*nb_divide*/
3753 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003754};
3755
3756
3757PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003758 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3759 "basestring",
3760 0,
3761 0,
3762 0, /* tp_dealloc */
3763 0, /* tp_print */
3764 0, /* tp_getattr */
3765 0, /* tp_setattr */
3766 0, /* tp_compare */
3767 0, /* tp_repr */
3768 0, /* tp_as_number */
3769 0, /* tp_as_sequence */
3770 0, /* tp_as_mapping */
3771 0, /* tp_hash */
3772 0, /* tp_call */
3773 0, /* tp_str */
3774 0, /* tp_getattro */
3775 0, /* tp_setattro */
3776 0, /* tp_as_buffer */
3777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3778 basestring_doc, /* tp_doc */
3779 0, /* tp_traverse */
3780 0, /* tp_clear */
3781 0, /* tp_richcompare */
3782 0, /* tp_weaklistoffset */
3783 0, /* tp_iter */
3784 0, /* tp_iternext */
3785 0, /* tp_methods */
3786 0, /* tp_members */
3787 0, /* tp_getset */
3788 &PyBaseObject_Type, /* tp_base */
3789 0, /* tp_dict */
3790 0, /* tp_descr_get */
3791 0, /* tp_descr_set */
3792 0, /* tp_dictoffset */
3793 0, /* tp_init */
3794 0, /* tp_alloc */
3795 basestring_new, /* tp_new */
3796 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003797};
3798
3799PyDoc_STRVAR(string_doc,
3800"str(object) -> string\n\
3801\n\
3802Return a nice string representation of the object.\n\
3803If the argument is a string, the return value is the same object.");
3804
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003805PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003806 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3807 "str",
3808 PyStringObject_SIZE,
3809 sizeof(char),
3810 string_dealloc, /* tp_dealloc */
3811 (printfunc)string_print, /* tp_print */
3812 0, /* tp_getattr */
3813 0, /* tp_setattr */
3814 0, /* tp_compare */
3815 string_repr, /* tp_repr */
3816 &string_as_number, /* tp_as_number */
3817 &string_as_sequence, /* tp_as_sequence */
3818 &string_as_mapping, /* tp_as_mapping */
3819 (hashfunc)string_hash, /* tp_hash */
3820 0, /* tp_call */
3821 string_str, /* tp_str */
3822 PyObject_GenericGetAttr, /* tp_getattro */
3823 0, /* tp_setattro */
3824 &string_as_buffer, /* tp_as_buffer */
3825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3826 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3827 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3828 string_doc, /* tp_doc */
3829 0, /* tp_traverse */
3830 0, /* tp_clear */
3831 (richcmpfunc)string_richcompare, /* tp_richcompare */
3832 0, /* tp_weaklistoffset */
3833 0, /* tp_iter */
3834 0, /* tp_iternext */
3835 string_methods, /* tp_methods */
3836 0, /* tp_members */
3837 0, /* tp_getset */
3838 &PyBaseString_Type, /* tp_base */
3839 0, /* tp_dict */
3840 0, /* tp_descr_get */
3841 0, /* tp_descr_set */
3842 0, /* tp_dictoffset */
3843 0, /* tp_init */
3844 0, /* tp_alloc */
3845 string_new, /* tp_new */
3846 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003847};
3848
3849void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003850PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003851{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003852 register PyObject *v;
3853 if (*pv == NULL)
3854 return;
3855 if (w == NULL || !PyString_Check(*pv)) {
3856 Py_DECREF(*pv);
3857 *pv = NULL;
3858 return;
3859 }
3860 v = string_concat((PyStringObject *) *pv, w);
3861 Py_DECREF(*pv);
3862 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003863}
3864
3865void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003866PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003867{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003868 PyString_Concat(pv, w);
3869 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003870}
3871
3872
3873/* The following function breaks the notion that strings are immutable:
3874 it changes the size of a string. We get away with this only if there
3875 is only one module referencing the object. You can also think of it
3876 as creating a new string object and destroying the old one, only
3877 more efficiently. In any case, don't use this if the string may
3878 already be known to some other part of the code...
3879 Note that if there's not enough memory to resize the string, the original
3880 string object at *pv is deallocated, *pv is set to NULL, an "out of
3881 memory" exception is set, and -1 is returned. Else (on success) 0 is
3882 returned, and the value in *pv may or may not be the same as on input.
3883 As always, an extra byte is allocated for a trailing \0 byte (newsize
3884 does *not* include that), and a trailing \0 byte is stored.
3885*/
3886
3887int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003888_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003889{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003890 register PyObject *v;
3891 register PyStringObject *sv;
3892 v = *pv;
3893 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3894 PyString_CHECK_INTERNED(v)) {
3895 *pv = 0;
3896 Py_DECREF(v);
3897 PyErr_BadInternalCall();
3898 return -1;
3899 }
3900 /* XXX UNREF/NEWREF interface should be more symmetrical */
3901 _Py_DEC_REFTOTAL;
3902 _Py_ForgetReference(v);
3903 *pv = (PyObject *)
3904 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3905 if (*pv == NULL) {
3906 PyObject_Del(v);
3907 PyErr_NoMemory();
3908 return -1;
3909 }
3910 _Py_NewReference(*pv);
3911 sv = (PyStringObject *) *pv;
3912 Py_SIZE(sv) = newsize;
3913 sv->ob_sval[newsize] = '\0';
3914 sv->ob_shash = -1; /* invalidate cached hash value */
3915 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003916}
3917
3918/* Helpers for formatstring */
3919
3920Py_LOCAL_INLINE(PyObject *)
3921getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3922{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003923 Py_ssize_t argidx = *p_argidx;
3924 if (argidx < arglen) {
3925 (*p_argidx)++;
3926 if (arglen < 0)
3927 return args;
3928 else
3929 return PyTuple_GetItem(args, argidx);
3930 }
3931 PyErr_SetString(PyExc_TypeError,
3932 "not enough arguments for format string");
3933 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003934}
3935
3936/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003937 * F_LJUST '-'
3938 * F_SIGN '+'
3939 * F_BLANK ' '
3940 * F_ALT '#'
3941 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003942 */
3943#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003944#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003945#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003946#define F_ALT (1<<3)
3947#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003948
Mark Dickinson18cfada2009-11-23 18:46:41 +00003949/* Returns a new reference to a PyString object, or NULL on failure. */
3950
3951static PyObject *
3952formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003953{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003954 char *p;
3955 PyObject *result;
3956 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003957
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003958 x = PyFloat_AsDouble(v);
3959 if (x == -1.0 && PyErr_Occurred()) {
3960 PyErr_Format(PyExc_TypeError, "float argument required, "
3961 "not %.200s", Py_TYPE(v)->tp_name);
3962 return NULL;
3963 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003964
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003965 if (prec < 0)
3966 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003967
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003968 p = PyOS_double_to_string(x, type, prec,
3969 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003970
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003971 if (p == NULL)
3972 return NULL;
3973 result = PyString_FromStringAndSize(p, strlen(p));
3974 PyMem_Free(p);
3975 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003976}
3977
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003978/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003979 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3980 * Python's regular ints.
3981 * Return value: a new PyString*, or NULL if error.
3982 * . *pbuf is set to point into it,
3983 * *plen set to the # of chars following that.
3984 * Caller must decref it when done using pbuf.
3985 * The string starting at *pbuf is of the form
3986 * "-"? ("0x" | "0X")? digit+
3987 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3988 * set in flags. The case of hex digits will be correct,
3989 * There will be at least prec digits, zero-filled on the left if
3990 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003991 * val object to be converted
3992 * flags bitmask of format flags; only F_ALT is looked at
3993 * prec minimum number of digits; 0-fill on left if needed
3994 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003995 *
3996 * CAUTION: o, x and X conversions on regular ints can never
3997 * produce a '-' sign, but can for Python's unbounded ints.
3998 */
3999PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004000_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004001 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004002{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004003 PyObject *result = NULL;
4004 char *buf;
4005 Py_ssize_t i;
4006 int sign; /* 1 if '-', else 0 */
4007 int len; /* number of characters */
4008 Py_ssize_t llen;
4009 int numdigits; /* len == numnondigits + numdigits */
4010 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004011
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004012 switch (type) {
4013 case 'd':
4014 case 'u':
4015 result = Py_TYPE(val)->tp_str(val);
4016 break;
4017 case 'o':
4018 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4019 break;
4020 case 'x':
4021 case 'X':
4022 numnondigits = 2;
4023 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4024 break;
4025 default:
4026 assert(!"'type' not in [duoxX]");
4027 }
4028 if (!result)
4029 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004030
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004031 buf = PyString_AsString(result);
4032 if (!buf) {
4033 Py_DECREF(result);
4034 return NULL;
4035 }
Christian Heimes44720832008-05-26 13:01:01 +00004036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004037 /* To modify the string in-place, there can only be one reference. */
4038 if (Py_REFCNT(result) != 1) {
4039 PyErr_BadInternalCall();
4040 return NULL;
4041 }
4042 llen = PyString_Size(result);
4043 if (llen > INT_MAX) {
4044 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4045 return NULL;
4046 }
4047 len = (int)llen;
4048 if (buf[len-1] == 'L') {
4049 --len;
4050 buf[len] = '\0';
4051 }
4052 sign = buf[0] == '-';
4053 numnondigits += sign;
4054 numdigits = len - numnondigits;
4055 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004056
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004057 /* Get rid of base marker unless F_ALT */
4058 if ((flags & F_ALT) == 0) {
4059 /* Need to skip 0x, 0X or 0. */
4060 int skipped = 0;
4061 switch (type) {
4062 case 'o':
4063 assert(buf[sign] == '0');
4064 /* If 0 is only digit, leave it alone. */
4065 if (numdigits > 1) {
4066 skipped = 1;
4067 --numdigits;
4068 }
4069 break;
4070 case 'x':
4071 case 'X':
4072 assert(buf[sign] == '0');
4073 assert(buf[sign + 1] == 'x');
4074 skipped = 2;
4075 numnondigits -= 2;
4076 break;
4077 }
4078 if (skipped) {
4079 buf += skipped;
4080 len -= skipped;
4081 if (sign)
4082 buf[0] = '-';
4083 }
4084 assert(len == numnondigits + numdigits);
4085 assert(numdigits > 0);
4086 }
Christian Heimes44720832008-05-26 13:01:01 +00004087
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004088 /* Fill with leading zeroes to meet minimum width. */
4089 if (prec > numdigits) {
4090 PyObject *r1 = PyString_FromStringAndSize(NULL,
4091 numnondigits + prec);
4092 char *b1;
4093 if (!r1) {
4094 Py_DECREF(result);
4095 return NULL;
4096 }
4097 b1 = PyString_AS_STRING(r1);
4098 for (i = 0; i < numnondigits; ++i)
4099 *b1++ = *buf++;
4100 for (i = 0; i < prec - numdigits; i++)
4101 *b1++ = '0';
4102 for (i = 0; i < numdigits; i++)
4103 *b1++ = *buf++;
4104 *b1 = '\0';
4105 Py_DECREF(result);
4106 result = r1;
4107 buf = PyString_AS_STRING(result);
4108 len = numnondigits + prec;
4109 }
Christian Heimes44720832008-05-26 13:01:01 +00004110
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004111 /* Fix up case for hex conversions. */
4112 if (type == 'X') {
4113 /* Need to convert all lower case letters to upper case.
4114 and need to convert 0x to 0X (and -0x to -0X). */
4115 for (i = 0; i < len; i++)
4116 if (buf[i] >= 'a' && buf[i] <= 'x')
4117 buf[i] -= 'a'-'A';
4118 }
4119 *pbuf = buf;
4120 *plen = len;
4121 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004122}
4123
4124Py_LOCAL_INLINE(int)
4125formatint(char *buf, size_t buflen, int flags,
4126 int prec, int type, PyObject *v)
4127{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004128 /* fmt = '%#.' + `prec` + 'l' + `type`
4129 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4130 + 1 + 1 = 24 */
4131 char fmt[64]; /* plenty big enough! */
4132 char *sign;
4133 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004134
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004135 x = PyInt_AsLong(v);
4136 if (x == -1 && PyErr_Occurred()) {
4137 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4138 Py_TYPE(v)->tp_name);
4139 return -1;
4140 }
4141 if (x < 0 && type == 'u') {
4142 type = 'd';
4143 }
4144 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4145 sign = "-";
4146 else
4147 sign = "";
4148 if (prec < 0)
4149 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004150
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004151 if ((flags & F_ALT) &&
4152 (type == 'x' || type == 'X')) {
4153 /* When converting under %#x or %#X, there are a number
4154 * of issues that cause pain:
4155 * - when 0 is being converted, the C standard leaves off
4156 * the '0x' or '0X', which is inconsistent with other
4157 * %#x/%#X conversions and inconsistent with Python's
4158 * hex() function
4159 * - there are platforms that violate the standard and
4160 * convert 0 with the '0x' or '0X'
4161 * (Metrowerks, Compaq Tru64)
4162 * - there are platforms that give '0x' when converting
4163 * under %#X, but convert 0 in accordance with the
4164 * standard (OS/2 EMX)
4165 *
4166 * We can achieve the desired consistency by inserting our
4167 * own '0x' or '0X' prefix, and substituting %x/%X in place
4168 * of %#x/%#X.
4169 *
4170 * Note that this is the same approach as used in
4171 * formatint() in unicodeobject.c
4172 */
4173 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4174 sign, type, prec, type);
4175 }
4176 else {
4177 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4178 sign, (flags&F_ALT) ? "#" : "",
4179 prec, type);
4180 }
Christian Heimes44720832008-05-26 13:01:01 +00004181
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004182 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4183 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4184 */
4185 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4186 PyErr_SetString(PyExc_OverflowError,
4187 "formatted integer is too long (precision too large?)");
4188 return -1;
4189 }
4190 if (sign[0])
4191 PyOS_snprintf(buf, buflen, fmt, -x);
4192 else
4193 PyOS_snprintf(buf, buflen, fmt, x);
4194 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004195}
4196
4197Py_LOCAL_INLINE(int)
4198formatchar(char *buf, size_t buflen, PyObject *v)
4199{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004200 /* presume that the buffer is at least 2 characters long */
4201 if (PyString_Check(v)) {
4202 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4203 return -1;
4204 }
4205 else {
4206 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4207 return -1;
4208 }
4209 buf[1] = '\0';
4210 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004211}
4212
4213/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4214
Mark Dickinson18cfada2009-11-23 18:46:41 +00004215 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004216 chars are formatted. XXX This is a magic number. Each formatting
4217 routine does bounds checking to ensure no overflow, but a better
4218 solution may be to malloc a buffer of appropriate size for each
4219 format. For now, the current solution is sufficient.
4220*/
4221#define FORMATBUFLEN (size_t)120
4222
4223PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004224PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004225{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004226 char *fmt, *res;
4227 Py_ssize_t arglen, argidx;
4228 Py_ssize_t reslen, rescnt, fmtcnt;
4229 int args_owned = 0;
4230 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004231#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004232 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004233#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004234 PyObject *dict = NULL;
4235 if (format == NULL || !PyString_Check(format) || args == NULL) {
4236 PyErr_BadInternalCall();
4237 return NULL;
4238 }
4239 orig_args = args;
4240 fmt = PyString_AS_STRING(format);
4241 fmtcnt = PyString_GET_SIZE(format);
4242 reslen = rescnt = fmtcnt + 100;
4243 result = PyString_FromStringAndSize((char *)NULL, reslen);
4244 if (result == NULL)
4245 return NULL;
4246 res = PyString_AsString(result);
4247 if (PyTuple_Check(args)) {
4248 arglen = PyTuple_GET_SIZE(args);
4249 argidx = 0;
4250 }
4251 else {
4252 arglen = -1;
4253 argidx = -2;
4254 }
4255 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4256 !PyObject_TypeCheck(args, &PyBaseString_Type))
4257 dict = args;
4258 while (--fmtcnt >= 0) {
4259 if (*fmt != '%') {
4260 if (--rescnt < 0) {
4261 rescnt = fmtcnt + 100;
4262 reslen += rescnt;
4263 if (_PyString_Resize(&result, reslen))
4264 return NULL;
4265 res = PyString_AS_STRING(result)
4266 + reslen - rescnt;
4267 --rescnt;
4268 }
4269 *res++ = *fmt++;
4270 }
4271 else {
4272 /* Got a format specifier */
4273 int flags = 0;
4274 Py_ssize_t width = -1;
4275 int prec = -1;
4276 int c = '\0';
4277 int fill;
4278 int isnumok;
4279 PyObject *v = NULL;
4280 PyObject *temp = NULL;
4281 char *pbuf;
4282 int sign;
4283 Py_ssize_t len;
4284 char formatbuf[FORMATBUFLEN];
4285 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004286#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004287 char *fmt_start = fmt;
4288 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004289#endif
4290
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004291 fmt++;
4292 if (*fmt == '(') {
4293 char *keystart;
4294 Py_ssize_t keylen;
4295 PyObject *key;
4296 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004297
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004298 if (dict == NULL) {
4299 PyErr_SetString(PyExc_TypeError,
4300 "format requires a mapping");
4301 goto error;
4302 }
4303 ++fmt;
4304 --fmtcnt;
4305 keystart = fmt;
4306 /* Skip over balanced parentheses */
4307 while (pcount > 0 && --fmtcnt >= 0) {
4308 if (*fmt == ')')
4309 --pcount;
4310 else if (*fmt == '(')
4311 ++pcount;
4312 fmt++;
4313 }
4314 keylen = fmt - keystart - 1;
4315 if (fmtcnt < 0 || pcount > 0) {
4316 PyErr_SetString(PyExc_ValueError,
4317 "incomplete format key");
4318 goto error;
4319 }
4320 key = PyString_FromStringAndSize(keystart,
4321 keylen);
4322 if (key == NULL)
4323 goto error;
4324 if (args_owned) {
4325 Py_DECREF(args);
4326 args_owned = 0;
4327 }
4328 args = PyObject_GetItem(dict, key);
4329 Py_DECREF(key);
4330 if (args == NULL) {
4331 goto error;
4332 }
4333 args_owned = 1;
4334 arglen = -1;
4335 argidx = -2;
4336 }
4337 while (--fmtcnt >= 0) {
4338 switch (c = *fmt++) {
4339 case '-': flags |= F_LJUST; continue;
4340 case '+': flags |= F_SIGN; continue;
4341 case ' ': flags |= F_BLANK; continue;
4342 case '#': flags |= F_ALT; continue;
4343 case '0': flags |= F_ZERO; continue;
4344 }
4345 break;
4346 }
4347 if (c == '*') {
4348 v = getnextarg(args, arglen, &argidx);
4349 if (v == NULL)
4350 goto error;
4351 if (!PyInt_Check(v)) {
4352 PyErr_SetString(PyExc_TypeError,
4353 "* wants int");
4354 goto error;
4355 }
4356 width = PyInt_AsLong(v);
4357 if (width < 0) {
4358 flags |= F_LJUST;
4359 width = -width;
4360 }
4361 if (--fmtcnt >= 0)
4362 c = *fmt++;
4363 }
4364 else if (c >= 0 && isdigit(c)) {
4365 width = c - '0';
4366 while (--fmtcnt >= 0) {
4367 c = Py_CHARMASK(*fmt++);
4368 if (!isdigit(c))
4369 break;
4370 if ((width*10) / 10 != width) {
4371 PyErr_SetString(
4372 PyExc_ValueError,
4373 "width too big");
4374 goto error;
4375 }
4376 width = width*10 + (c - '0');
4377 }
4378 }
4379 if (c == '.') {
4380 prec = 0;
4381 if (--fmtcnt >= 0)
4382 c = *fmt++;
4383 if (c == '*') {
4384 v = getnextarg(args, arglen, &argidx);
4385 if (v == NULL)
4386 goto error;
4387 if (!PyInt_Check(v)) {
4388 PyErr_SetString(
4389 PyExc_TypeError,
4390 "* wants int");
4391 goto error;
4392 }
4393 prec = PyInt_AsLong(v);
4394 if (prec < 0)
4395 prec = 0;
4396 if (--fmtcnt >= 0)
4397 c = *fmt++;
4398 }
4399 else if (c >= 0 && isdigit(c)) {
4400 prec = c - '0';
4401 while (--fmtcnt >= 0) {
4402 c = Py_CHARMASK(*fmt++);
4403 if (!isdigit(c))
4404 break;
4405 if ((prec*10) / 10 != prec) {
4406 PyErr_SetString(
4407 PyExc_ValueError,
4408 "prec too big");
4409 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004410 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004411 prec = prec*10 + (c - '0');
4412 }
4413 }
4414 } /* prec */
4415 if (fmtcnt >= 0) {
4416 if (c == 'h' || c == 'l' || c == 'L') {
4417 if (--fmtcnt >= 0)
4418 c = *fmt++;
4419 }
4420 }
4421 if (fmtcnt < 0) {
4422 PyErr_SetString(PyExc_ValueError,
4423 "incomplete format");
4424 goto error;
4425 }
4426 if (c != '%') {
4427 v = getnextarg(args, arglen, &argidx);
4428 if (v == NULL)
4429 goto error;
4430 }
4431 sign = 0;
4432 fill = ' ';
4433 switch (c) {
4434 case '%':
4435 pbuf = "%";
4436 len = 1;
4437 break;
4438 case 's':
4439#ifdef Py_USING_UNICODE
4440 if (PyUnicode_Check(v)) {
4441 fmt = fmt_start;
4442 argidx = argidx_start;
4443 goto unicode;
4444 }
4445#endif
4446 temp = _PyObject_Str(v);
4447#ifdef Py_USING_UNICODE
4448 if (temp != NULL && PyUnicode_Check(temp)) {
4449 Py_DECREF(temp);
4450 fmt = fmt_start;
4451 argidx = argidx_start;
4452 goto unicode;
4453 }
4454#endif
4455 /* Fall through */
4456 case 'r':
4457 if (c == 'r')
4458 temp = PyObject_Repr(v);
4459 if (temp == NULL)
4460 goto error;
4461 if (!PyString_Check(temp)) {
4462 PyErr_SetString(PyExc_TypeError,
4463 "%s argument has non-string str()");
4464 Py_DECREF(temp);
4465 goto error;
4466 }
4467 pbuf = PyString_AS_STRING(temp);
4468 len = PyString_GET_SIZE(temp);
4469 if (prec >= 0 && len > prec)
4470 len = prec;
4471 break;
4472 case 'i':
4473 case 'd':
4474 case 'u':
4475 case 'o':
4476 case 'x':
4477 case 'X':
4478 if (c == 'i')
4479 c = 'd';
4480 isnumok = 0;
4481 if (PyNumber_Check(v)) {
4482 PyObject *iobj=NULL;
4483
4484 if (PyInt_Check(v) || (PyLong_Check(v))) {
4485 iobj = v;
4486 Py_INCREF(iobj);
4487 }
4488 else {
4489 iobj = PyNumber_Int(v);
4490 if (iobj==NULL) iobj = PyNumber_Long(v);
4491 }
4492 if (iobj!=NULL) {
4493 if (PyInt_Check(iobj)) {
4494 isnumok = 1;
4495 pbuf = formatbuf;
4496 len = formatint(pbuf,
4497 sizeof(formatbuf),
4498 flags, prec, c, iobj);
4499 Py_DECREF(iobj);
4500 if (len < 0)
4501 goto error;
4502 sign = 1;
4503 }
4504 else if (PyLong_Check(iobj)) {
4505 int ilen;
4506
4507 isnumok = 1;
4508 temp = _PyString_FormatLong(iobj, flags,
4509 prec, c, &pbuf, &ilen);
4510 Py_DECREF(iobj);
4511 len = ilen;
4512 if (!temp)
4513 goto error;
4514 sign = 1;
4515 }
4516 else {
4517 Py_DECREF(iobj);
4518 }
4519 }
4520 }
4521 if (!isnumok) {
4522 PyErr_Format(PyExc_TypeError,
4523 "%%%c format: a number is required, "
4524 "not %.200s", c, Py_TYPE(v)->tp_name);
4525 goto error;
4526 }
4527 if (flags & F_ZERO)
4528 fill = '0';
4529 break;
4530 case 'e':
4531 case 'E':
4532 case 'f':
4533 case 'F':
4534 case 'g':
4535 case 'G':
4536 temp = formatfloat(v, flags, prec, c);
4537 if (temp == NULL)
4538 goto error;
4539 pbuf = PyString_AS_STRING(temp);
4540 len = PyString_GET_SIZE(temp);
4541 sign = 1;
4542 if (flags & F_ZERO)
4543 fill = '0';
4544 break;
4545 case 'c':
4546#ifdef Py_USING_UNICODE
4547 if (PyUnicode_Check(v)) {
4548 fmt = fmt_start;
4549 argidx = argidx_start;
4550 goto unicode;
4551 }
4552#endif
4553 pbuf = formatbuf;
4554 len = formatchar(pbuf, sizeof(formatbuf), v);
4555 if (len < 0)
4556 goto error;
4557 break;
4558 default:
4559 PyErr_Format(PyExc_ValueError,
4560 "unsupported format character '%c' (0x%x) "
4561 "at index %zd",
4562 c, c,
4563 (Py_ssize_t)(fmt - 1 -
4564 PyString_AsString(format)));
4565 goto error;
4566 }
4567 if (sign) {
4568 if (*pbuf == '-' || *pbuf == '+') {
4569 sign = *pbuf++;
4570 len--;
4571 }
4572 else if (flags & F_SIGN)
4573 sign = '+';
4574 else if (flags & F_BLANK)
4575 sign = ' ';
4576 else
4577 sign = 0;
4578 }
4579 if (width < len)
4580 width = len;
4581 if (rescnt - (sign != 0) < width) {
4582 reslen -= rescnt;
4583 rescnt = width + fmtcnt + 100;
4584 reslen += rescnt;
4585 if (reslen < 0) {
4586 Py_DECREF(result);
4587 Py_XDECREF(temp);
4588 return PyErr_NoMemory();
4589 }
4590 if (_PyString_Resize(&result, reslen)) {
4591 Py_XDECREF(temp);
4592 return NULL;
4593 }
4594 res = PyString_AS_STRING(result)
4595 + reslen - rescnt;
4596 }
4597 if (sign) {
4598 if (fill != ' ')
4599 *res++ = sign;
4600 rescnt--;
4601 if (width > len)
4602 width--;
4603 }
4604 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4605 assert(pbuf[0] == '0');
4606 assert(pbuf[1] == c);
4607 if (fill != ' ') {
4608 *res++ = *pbuf++;
4609 *res++ = *pbuf++;
4610 }
4611 rescnt -= 2;
4612 width -= 2;
4613 if (width < 0)
4614 width = 0;
4615 len -= 2;
4616 }
4617 if (width > len && !(flags & F_LJUST)) {
4618 do {
4619 --rescnt;
4620 *res++ = fill;
4621 } while (--width > len);
4622 }
4623 if (fill == ' ') {
4624 if (sign)
4625 *res++ = sign;
4626 if ((flags & F_ALT) &&
4627 (c == 'x' || c == 'X')) {
4628 assert(pbuf[0] == '0');
4629 assert(pbuf[1] == c);
4630 *res++ = *pbuf++;
4631 *res++ = *pbuf++;
4632 }
4633 }
4634 Py_MEMCPY(res, pbuf, len);
4635 res += len;
4636 rescnt -= len;
4637 while (--width >= len) {
4638 --rescnt;
4639 *res++ = ' ';
4640 }
4641 if (dict && (argidx < arglen) && c != '%') {
4642 PyErr_SetString(PyExc_TypeError,
4643 "not all arguments converted during string formatting");
4644 Py_XDECREF(temp);
4645 goto error;
4646 }
4647 Py_XDECREF(temp);
4648 } /* '%' */
4649 } /* until end */
4650 if (argidx < arglen && !dict) {
4651 PyErr_SetString(PyExc_TypeError,
4652 "not all arguments converted during string formatting");
4653 goto error;
4654 }
4655 if (args_owned) {
4656 Py_DECREF(args);
4657 }
4658 if (_PyString_Resize(&result, reslen - rescnt))
4659 return NULL;
4660 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004661
4662#ifdef Py_USING_UNICODE
4663 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004664 if (args_owned) {
4665 Py_DECREF(args);
4666 args_owned = 0;
4667 }
4668 /* Fiddle args right (remove the first argidx arguments) */
4669 if (PyTuple_Check(orig_args) && argidx > 0) {
4670 PyObject *v;
4671 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4672 v = PyTuple_New(n);
4673 if (v == NULL)
4674 goto error;
4675 while (--n >= 0) {
4676 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4677 Py_INCREF(w);
4678 PyTuple_SET_ITEM(v, n, w);
4679 }
4680 args = v;
4681 } else {
4682 Py_INCREF(orig_args);
4683 args = orig_args;
4684 }
4685 args_owned = 1;
4686 /* Take what we have of the result and let the Unicode formatting
4687 function format the rest of the input. */
4688 rescnt = res - PyString_AS_STRING(result);
4689 if (_PyString_Resize(&result, rescnt))
4690 goto error;
4691 fmtcnt = PyString_GET_SIZE(format) - \
4692 (fmt - PyString_AS_STRING(format));
4693 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4694 if (format == NULL)
4695 goto error;
4696 v = PyUnicode_Format(format, args);
4697 Py_DECREF(format);
4698 if (v == NULL)
4699 goto error;
4700 /* Paste what we have (result) to what the Unicode formatting
4701 function returned (v) and return the result (or error) */
4702 w = PyUnicode_Concat(result, v);
4703 Py_DECREF(result);
4704 Py_DECREF(v);
4705 Py_DECREF(args);
4706 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004707#endif /* Py_USING_UNICODE */
4708
4709 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004710 Py_DECREF(result);
4711 if (args_owned) {
4712 Py_DECREF(args);
4713 }
4714 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004715}
4716
4717void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004718PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004719{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004720 register PyStringObject *s = (PyStringObject *)(*p);
4721 PyObject *t;
4722 if (s == NULL || !PyString_Check(s))
4723 Py_FatalError("PyString_InternInPlace: strings only please!");
4724 /* If it's a string subclass, we don't really know what putting
4725 it in the interned dict might do. */
4726 if (!PyString_CheckExact(s))
4727 return;
4728 if (PyString_CHECK_INTERNED(s))
4729 return;
4730 if (interned == NULL) {
4731 interned = PyDict_New();
4732 if (interned == NULL) {
4733 PyErr_Clear(); /* Don't leave an exception */
4734 return;
4735 }
4736 }
4737 t = PyDict_GetItem(interned, (PyObject *)s);
4738 if (t) {
4739 Py_INCREF(t);
4740 Py_DECREF(*p);
4741 *p = t;
4742 return;
4743 }
Christian Heimes44720832008-05-26 13:01:01 +00004744
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004745 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4746 PyErr_Clear();
4747 return;
4748 }
4749 /* The two references in interned are not counted by refcnt.
4750 The string deallocator will take care of this */
4751 Py_REFCNT(s) -= 2;
4752 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004753}
4754
4755void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004756PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004757{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004758 PyString_InternInPlace(p);
4759 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4760 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4761 Py_INCREF(*p);
4762 }
Christian Heimes44720832008-05-26 13:01:01 +00004763}
4764
4765
4766PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004767PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004768{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004769 PyObject *s = PyString_FromString(cp);
4770 if (s == NULL)
4771 return NULL;
4772 PyString_InternInPlace(&s);
4773 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004774}
4775
4776void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004777PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004778{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004779 int i;
4780 for (i = 0; i < UCHAR_MAX + 1; i++) {
4781 Py_XDECREF(characters[i]);
4782 characters[i] = NULL;
4783 }
4784 Py_XDECREF(nullstring);
4785 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004786}
4787
4788void _Py_ReleaseInternedStrings(void)
4789{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004790 PyObject *keys;
4791 PyStringObject *s;
4792 Py_ssize_t i, n;
4793 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004794
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004795 if (interned == NULL || !PyDict_Check(interned))
4796 return;
4797 keys = PyDict_Keys(interned);
4798 if (keys == NULL || !PyList_Check(keys)) {
4799 PyErr_Clear();
4800 return;
4801 }
Christian Heimes44720832008-05-26 13:01:01 +00004802
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004803 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4804 detector, interned strings are not forcibly deallocated; rather, we
4805 give them their stolen references back, and then clear and DECREF
4806 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004807
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004808 n = PyList_GET_SIZE(keys);
4809 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4810 n);
4811 for (i = 0; i < n; i++) {
4812 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4813 switch (s->ob_sstate) {
4814 case SSTATE_NOT_INTERNED:
4815 /* XXX Shouldn't happen */
4816 break;
4817 case SSTATE_INTERNED_IMMORTAL:
4818 Py_REFCNT(s) += 1;
4819 immortal_size += Py_SIZE(s);
4820 break;
4821 case SSTATE_INTERNED_MORTAL:
4822 Py_REFCNT(s) += 2;
4823 mortal_size += Py_SIZE(s);
4824 break;
4825 default:
4826 Py_FatalError("Inconsistent interned string state.");
4827 }
4828 s->ob_sstate = SSTATE_NOT_INTERNED;
4829 }
4830 fprintf(stderr, "total size of all interned strings: "
4831 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4832 "mortal/immortal\n", mortal_size, immortal_size);
4833 Py_DECREF(keys);
4834 PyDict_Clear(interned);
4835 Py_DECREF(interned);
4836 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004837}