blob: 152ea215f37ef4766d51fbf2bf4091caadda5a1e [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001265#ifdef Py_DEBUG
Benjamin Peterson26da9202012-02-21 11:08:50 -05001266 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001267#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001268 if (a->ob_shash != -1)
1269 return a->ob_shash;
1270 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001271 /*
1272 We make the hash of the empty string be 0, rather than using
1273 (prefix ^ suffix), since this slightly obfuscates the hash secret
1274 */
1275 if (len == 0) {
1276 a->ob_shash = 0;
1277 return 0;
1278 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001279 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001280 x = _Py_HashSecret.prefix;
1281 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 while (--len >= 0)
1283 x = (1000003*x) ^ *p++;
1284 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001285 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 if (x == -1)
1287 x = -2;
1288 a->ob_shash = x;
1289 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001290}
1291
1292static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001293string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001294{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001295 if (PyIndex_Check(item)) {
1296 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1297 if (i == -1 && PyErr_Occurred())
1298 return NULL;
1299 if (i < 0)
1300 i += PyString_GET_SIZE(self);
1301 return string_item(self, i);
1302 }
1303 else if (PySlice_Check(item)) {
1304 Py_ssize_t start, stop, step, slicelength, cur, i;
1305 char* source_buf;
1306 char* result_buf;
1307 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001308
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001309 if (PySlice_GetIndicesEx((PySliceObject*)item,
1310 PyString_GET_SIZE(self),
1311 &start, &stop, &step, &slicelength) < 0) {
1312 return NULL;
1313 }
Christian Heimes44720832008-05-26 13:01:01 +00001314
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001315 if (slicelength <= 0) {
1316 return PyString_FromStringAndSize("", 0);
1317 }
1318 else if (start == 0 && step == 1 &&
1319 slicelength == PyString_GET_SIZE(self) &&
1320 PyString_CheckExact(self)) {
1321 Py_INCREF(self);
1322 return (PyObject *)self;
1323 }
1324 else if (step == 1) {
1325 return PyString_FromStringAndSize(
1326 PyString_AS_STRING(self) + start,
1327 slicelength);
1328 }
1329 else {
1330 source_buf = PyString_AsString((PyObject*)self);
1331 result_buf = (char *)PyMem_Malloc(slicelength);
1332 if (result_buf == NULL)
1333 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001334
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001335 for (cur = start, i = 0; i < slicelength;
1336 cur += step, i++) {
1337 result_buf[i] = source_buf[cur];
1338 }
Christian Heimes44720832008-05-26 13:01:01 +00001339
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001340 result = PyString_FromStringAndSize(result_buf,
1341 slicelength);
1342 PyMem_Free(result_buf);
1343 return result;
1344 }
1345 }
1346 else {
1347 PyErr_Format(PyExc_TypeError,
1348 "string indices must be integers, not %.200s",
1349 Py_TYPE(item)->tp_name);
1350 return NULL;
1351 }
Christian Heimes44720832008-05-26 13:01:01 +00001352}
1353
1354static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001355string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001356{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001357 if ( index != 0 ) {
1358 PyErr_SetString(PyExc_SystemError,
1359 "accessing non-existent string segment");
1360 return -1;
1361 }
1362 *ptr = (void *)self->ob_sval;
1363 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001364}
1365
1366static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001367string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001368{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001369 PyErr_SetString(PyExc_TypeError,
1370 "Cannot use string as modifiable buffer");
1371 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001372}
1373
1374static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001375string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001376{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001377 if ( lenp )
1378 *lenp = Py_SIZE(self);
1379 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001380}
1381
1382static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001383string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001384{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001385 if ( index != 0 ) {
1386 PyErr_SetString(PyExc_SystemError,
1387 "accessing non-existent string segment");
1388 return -1;
1389 }
1390 *ptr = self->ob_sval;
1391 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001392}
1393
1394static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001395string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001396{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001397 return PyBuffer_FillInfo(view, (PyObject*)self,
1398 (void *)self->ob_sval, Py_SIZE(self),
1399 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001400}
1401
1402static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001403 (lenfunc)string_length, /*sq_length*/
1404 (binaryfunc)string_concat, /*sq_concat*/
1405 (ssizeargfunc)string_repeat, /*sq_repeat*/
1406 (ssizeargfunc)string_item, /*sq_item*/
1407 (ssizessizeargfunc)string_slice, /*sq_slice*/
1408 0, /*sq_ass_item*/
1409 0, /*sq_ass_slice*/
1410 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001411};
1412
1413static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001414 (lenfunc)string_length,
1415 (binaryfunc)string_subscript,
1416 0,
Christian Heimes44720832008-05-26 13:01:01 +00001417};
1418
1419static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001420 (readbufferproc)string_buffer_getreadbuf,
1421 (writebufferproc)string_buffer_getwritebuf,
1422 (segcountproc)string_buffer_getsegcount,
1423 (charbufferproc)string_buffer_getcharbuf,
1424 (getbufferproc)string_buffer_getbuffer,
1425 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001426};
1427
1428
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001429
Christian Heimes44720832008-05-26 13:01:01 +00001430#define LEFTSTRIP 0
1431#define RIGHTSTRIP 1
1432#define BOTHSTRIP 2
1433
1434/* Arrays indexed by above */
1435static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1436
1437#define STRIPNAME(i) (stripformat[i]+3)
1438
Christian Heimes1a6387e2008-03-26 12:49:49 +00001439PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001440"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441\n\
Christian Heimes44720832008-05-26 13:01:01 +00001442Return a list of the words in the string S, using sep as the\n\
1443delimiter string. If maxsplit is given, at most maxsplit\n\
1444splits are done. If sep is not specified or is None, any\n\
1445whitespace string is a separator and empty strings are removed\n\
1446from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001447
1448static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001449string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001450{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001451 Py_ssize_t len = PyString_GET_SIZE(self), n;
1452 Py_ssize_t maxsplit = -1;
1453 const char *s = PyString_AS_STRING(self), *sub;
1454 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001455
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1457 return NULL;
1458 if (maxsplit < 0)
1459 maxsplit = PY_SSIZE_T_MAX;
1460 if (subobj == Py_None)
1461 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1465 }
Christian Heimes44720832008-05-26 13:01:01 +00001466#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001469#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001470 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1471 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001474}
1475
1476PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001477"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001479Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001480the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001481found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482
1483static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001484string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001486 const char *sep;
1487 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001489 if (PyString_Check(sep_obj)) {
1490 sep = PyString_AS_STRING(sep_obj);
1491 sep_len = PyString_GET_SIZE(sep_obj);
1492 }
Christian Heimes44720832008-05-26 13:01:01 +00001493#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001494 else if (PyUnicode_Check(sep_obj))
1495 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001496#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001497 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1498 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001499
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001500 return stringlib_partition(
1501 (PyObject*) self,
1502 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1503 sep_obj, sep, sep_len
1504 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001505}
1506
1507PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001508"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001510Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001511the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001512separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001513
1514static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001515string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001516{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001517 const char *sep;
1518 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001519
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001520 if (PyString_Check(sep_obj)) {
1521 sep = PyString_AS_STRING(sep_obj);
1522 sep_len = PyString_GET_SIZE(sep_obj);
1523 }
Christian Heimes44720832008-05-26 13:01:01 +00001524#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001525 else if (PyUnicode_Check(sep_obj))
1526 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001527#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001528 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1529 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001531 return stringlib_rpartition(
1532 (PyObject*) self,
1533 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1534 sep_obj, sep, sep_len
1535 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536}
1537
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001539"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540\n\
Christian Heimes44720832008-05-26 13:01:01 +00001541Return a list of the words in the string S, using sep as the\n\
1542delimiter string, starting at the end of the string and working\n\
1543to the front. If maxsplit is given, at most maxsplit splits are\n\
1544done. If sep is not specified or is None, any whitespace string\n\
1545is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001546
1547static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001548string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001549{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001550 Py_ssize_t len = PyString_GET_SIZE(self), n;
1551 Py_ssize_t maxsplit = -1;
1552 const char *s = PyString_AS_STRING(self), *sub;
1553 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001554
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001555 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1556 return NULL;
1557 if (maxsplit < 0)
1558 maxsplit = PY_SSIZE_T_MAX;
1559 if (subobj == Py_None)
1560 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
1563 n = PyString_GET_SIZE(subobj);
1564 }
Christian Heimes44720832008-05-26 13:01:01 +00001565#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001568#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001569 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1570 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001571
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001573}
1574
1575
1576PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001577"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001578\n\
1579Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001580iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001581
1582static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001583string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001584{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001585 char *sep = PyString_AS_STRING(self);
1586 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1587 PyObject *res = NULL;
1588 char *p;
1589 Py_ssize_t seqlen = 0;
1590 size_t sz = 0;
1591 Py_ssize_t i;
1592 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001593
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001594 seq = PySequence_Fast(orig, "");
1595 if (seq == NULL) {
1596 return NULL;
1597 }
Christian Heimes44720832008-05-26 13:01:01 +00001598
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001599 seqlen = PySequence_Size(seq);
1600 if (seqlen == 0) {
1601 Py_DECREF(seq);
1602 return PyString_FromString("");
1603 }
1604 if (seqlen == 1) {
1605 item = PySequence_Fast_GET_ITEM(seq, 0);
1606 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1607 Py_INCREF(item);
1608 Py_DECREF(seq);
1609 return item;
1610 }
1611 }
Christian Heimes44720832008-05-26 13:01:01 +00001612
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001613 /* There are at least two things to join, or else we have a subclass
1614 * of the builtin types in the sequence.
1615 * Do a pre-pass to figure out the total amount of space we'll
1616 * need (sz), see whether any argument is absurd, and defer to
1617 * the Unicode join if appropriate.
1618 */
1619 for (i = 0; i < seqlen; i++) {
1620 const size_t old_sz = sz;
1621 item = PySequence_Fast_GET_ITEM(seq, i);
1622 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001623#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001624 if (PyUnicode_Check(item)) {
1625 /* Defer to Unicode join.
1626 * CAUTION: There's no gurantee that the
1627 * original sequence can be iterated over
1628 * again, so we must pass seq here.
1629 */
1630 PyObject *result;
1631 result = PyUnicode_Join((PyObject *)self, seq);
1632 Py_DECREF(seq);
1633 return result;
1634 }
Christian Heimes44720832008-05-26 13:01:01 +00001635#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001636 PyErr_Format(PyExc_TypeError,
1637 "sequence item %zd: expected string,"
1638 " %.80s found",
1639 i, Py_TYPE(item)->tp_name);
1640 Py_DECREF(seq);
1641 return NULL;
1642 }
1643 sz += PyString_GET_SIZE(item);
1644 if (i != 0)
1645 sz += seplen;
1646 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1647 PyErr_SetString(PyExc_OverflowError,
1648 "join() result is too long for a Python string");
1649 Py_DECREF(seq);
1650 return NULL;
1651 }
1652 }
Christian Heimes44720832008-05-26 13:01:01 +00001653
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001654 /* Allocate result space. */
1655 res = PyString_FromStringAndSize((char*)NULL, sz);
1656 if (res == NULL) {
1657 Py_DECREF(seq);
1658 return NULL;
1659 }
Christian Heimes44720832008-05-26 13:01:01 +00001660
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001661 /* Catenate everything. */
1662 p = PyString_AS_STRING(res);
1663 for (i = 0; i < seqlen; ++i) {
1664 size_t n;
1665 item = PySequence_Fast_GET_ITEM(seq, i);
1666 n = PyString_GET_SIZE(item);
1667 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1668 p += n;
1669 if (i < seqlen - 1) {
1670 Py_MEMCPY(p, sep, seplen);
1671 p += seplen;
1672 }
1673 }
Christian Heimes44720832008-05-26 13:01:01 +00001674
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001675 Py_DECREF(seq);
1676 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001677}
1678
1679PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001680_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001681{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001682 assert(sep != NULL && PyString_Check(sep));
1683 assert(x != NULL);
1684 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001685}
1686
Antoine Pitrou64672132010-01-13 07:55:48 +00001687/* helper macro to fixup start/end slice values */
1688#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001689 if (end > len) \
1690 end = len; \
1691 else if (end < 0) { \
1692 end += len; \
1693 if (end < 0) \
1694 end = 0; \
1695 } \
1696 if (start < 0) { \
1697 start += len; \
1698 if (start < 0) \
1699 start = 0; \
1700 }
Christian Heimes44720832008-05-26 13:01:01 +00001701
1702Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001703string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001704{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001705 PyObject *subobj;
1706 const char *sub;
1707 Py_ssize_t sub_len;
1708 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001709
Jesus Cea44e81682011-04-20 16:39:15 +02001710 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1711 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001712 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001713
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001714 if (PyString_Check(subobj)) {
1715 sub = PyString_AS_STRING(subobj);
1716 sub_len = PyString_GET_SIZE(subobj);
1717 }
Christian Heimes44720832008-05-26 13:01:01 +00001718#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001719 else if (PyUnicode_Check(subobj))
1720 return PyUnicode_Find(
1721 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001722#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1726 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001727
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001728 if (dir > 0)
1729 return stringlib_find_slice(
1730 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1731 sub, sub_len, start, end);
1732 else
1733 return stringlib_rfind_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001736}
1737
1738
1739PyDoc_STRVAR(find__doc__,
1740"S.find(sub [,start [,end]]) -> int\n\
1741\n\
1742Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001743such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001744arguments start and end are interpreted as in slice notation.\n\
1745\n\
1746Return -1 on failure.");
1747
1748static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001749string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001750{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001751 Py_ssize_t result = string_find_internal(self, args, +1);
1752 if (result == -2)
1753 return NULL;
1754 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001755}
1756
1757
1758PyDoc_STRVAR(index__doc__,
1759"S.index(sub [,start [,end]]) -> int\n\
1760\n\
1761Like S.find() but raise ValueError when the substring is not found.");
1762
1763static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001764string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001765{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001766 Py_ssize_t result = string_find_internal(self, args, +1);
1767 if (result == -2)
1768 return NULL;
1769 if (result == -1) {
1770 PyErr_SetString(PyExc_ValueError,
1771 "substring not found");
1772 return NULL;
1773 }
1774 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001775}
1776
1777
1778PyDoc_STRVAR(rfind__doc__,
1779"S.rfind(sub [,start [,end]]) -> int\n\
1780\n\
1781Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001782such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001783arguments start and end are interpreted as in slice notation.\n\
1784\n\
1785Return -1 on failure.");
1786
1787static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001788string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001789{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001790 Py_ssize_t result = string_find_internal(self, args, -1);
1791 if (result == -2)
1792 return NULL;
1793 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001794}
1795
1796
1797PyDoc_STRVAR(rindex__doc__,
1798"S.rindex(sub [,start [,end]]) -> int\n\
1799\n\
1800Like S.rfind() but raise ValueError when the substring is not found.");
1801
1802static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001803string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001804{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001805 Py_ssize_t result = string_find_internal(self, args, -1);
1806 if (result == -2)
1807 return NULL;
1808 if (result == -1) {
1809 PyErr_SetString(PyExc_ValueError,
1810 "substring not found");
1811 return NULL;
1812 }
1813 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001814}
1815
1816
1817Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001818do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001819{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001820 char *s = PyString_AS_STRING(self);
1821 Py_ssize_t len = PyString_GET_SIZE(self);
1822 char *sep = PyString_AS_STRING(sepobj);
1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1824 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001825
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001826 i = 0;
1827 if (striptype != RIGHTSTRIP) {
1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1829 i++;
1830 }
1831 }
Christian Heimes44720832008-05-26 13:01:01 +00001832
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001833 j = len;
1834 if (striptype != LEFTSTRIP) {
1835 do {
1836 j--;
1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1838 j++;
1839 }
Christian Heimes44720832008-05-26 13:01:01 +00001840
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001841 if (i == 0 && j == len && PyString_CheckExact(self)) {
1842 Py_INCREF(self);
1843 return (PyObject*)self;
1844 }
1845 else
1846 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001847}
1848
1849
1850Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001851do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001852{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001853 char *s = PyString_AS_STRING(self);
1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001855
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001856 i = 0;
1857 if (striptype != RIGHTSTRIP) {
1858 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1859 i++;
1860 }
1861 }
Christian Heimes44720832008-05-26 13:01:01 +00001862
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001863 j = len;
1864 if (striptype != LEFTSTRIP) {
1865 do {
1866 j--;
1867 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1868 j++;
1869 }
Christian Heimes44720832008-05-26 13:01:01 +00001870
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001871 if (i == 0 && j == len && PyString_CheckExact(self)) {
1872 Py_INCREF(self);
1873 return (PyObject*)self;
1874 }
1875 else
1876 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001877}
1878
1879
1880Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001881do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001882{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001883 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1886 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001887
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001888 if (sep != NULL && sep != Py_None) {
1889 if (PyString_Check(sep))
1890 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001891#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001892 else if (PyUnicode_Check(sep)) {
1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1894 PyObject *res;
1895 if (uniself==NULL)
1896 return NULL;
1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1898 striptype, sep);
1899 Py_DECREF(uniself);
1900 return res;
1901 }
Christian Heimes44720832008-05-26 13:01:01 +00001902#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001903 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001904#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001905 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001906#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001907 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001908#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001909 STRIPNAME(striptype));
1910 return NULL;
1911 }
Christian Heimes44720832008-05-26 13:01:01 +00001912
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001913 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001914}
1915
1916
1917PyDoc_STRVAR(strip__doc__,
1918"S.strip([chars]) -> string or unicode\n\
1919\n\
1920Return a copy of the string S with leading and trailing\n\
1921whitespace removed.\n\
1922If chars is given and not None, remove characters in chars instead.\n\
1923If chars is unicode, S will be converted to unicode before stripping");
1924
1925static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001926string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001928 if (PyTuple_GET_SIZE(args) == 0)
1929 return do_strip(self, BOTHSTRIP); /* Common case */
1930 else
1931 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001932}
1933
1934
1935PyDoc_STRVAR(lstrip__doc__,
1936"S.lstrip([chars]) -> string or unicode\n\
1937\n\
1938Return a copy of the string S with leading whitespace removed.\n\
1939If chars is given and not None, remove characters in chars instead.\n\
1940If chars is unicode, S will be converted to unicode before stripping");
1941
1942static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001943string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001944{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001945 if (PyTuple_GET_SIZE(args) == 0)
1946 return do_strip(self, LEFTSTRIP); /* Common case */
1947 else
1948 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001949}
1950
1951
1952PyDoc_STRVAR(rstrip__doc__,
1953"S.rstrip([chars]) -> string or unicode\n\
1954\n\
1955Return a copy of the string S with trailing whitespace removed.\n\
1956If chars is given and not None, remove characters in chars instead.\n\
1957If chars is unicode, S will be converted to unicode before stripping");
1958
1959static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001960string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001961{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001962 if (PyTuple_GET_SIZE(args) == 0)
1963 return do_strip(self, RIGHTSTRIP); /* Common case */
1964 else
1965 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001966}
1967
1968
1969PyDoc_STRVAR(lower__doc__,
1970"S.lower() -> string\n\
1971\n\
1972Return a copy of the string S converted to lowercase.");
1973
1974/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1975#ifndef _tolower
1976#define _tolower tolower
1977#endif
1978
1979static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001980string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001981{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001982 char *s;
1983 Py_ssize_t i, n = PyString_GET_SIZE(self);
1984 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001985
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001986 newobj = PyString_FromStringAndSize(NULL, n);
1987 if (!newobj)
1988 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001989
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001990 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001991
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001992 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001993
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001994 for (i = 0; i < n; i++) {
1995 int c = Py_CHARMASK(s[i]);
1996 if (isupper(c))
1997 s[i] = _tolower(c);
1998 }
Christian Heimes44720832008-05-26 13:01:01 +00001999
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002000 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002001}
2002
2003PyDoc_STRVAR(upper__doc__,
2004"S.upper() -> string\n\
2005\n\
2006Return a copy of the string S converted to uppercase.");
2007
2008#ifndef _toupper
2009#define _toupper toupper
2010#endif
2011
2012static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002013string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002014{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002015 char *s;
2016 Py_ssize_t i, n = PyString_GET_SIZE(self);
2017 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002018
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002019 newobj = PyString_FromStringAndSize(NULL, n);
2020 if (!newobj)
2021 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002023 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002024
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002025 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002027 for (i = 0; i < n; i++) {
2028 int c = Py_CHARMASK(s[i]);
2029 if (islower(c))
2030 s[i] = _toupper(c);
2031 }
Christian Heimes44720832008-05-26 13:01:01 +00002032
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002033 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002034}
2035
2036PyDoc_STRVAR(title__doc__,
2037"S.title() -> string\n\
2038\n\
2039Return a titlecased version of S, i.e. words start with uppercase\n\
2040characters, all remaining cased characters have lowercase.");
2041
2042static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002043string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002044{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002045 char *s = PyString_AS_STRING(self), *s_new;
2046 Py_ssize_t i, n = PyString_GET_SIZE(self);
2047 int previous_is_cased = 0;
2048 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002050 newobj = PyString_FromStringAndSize(NULL, n);
2051 if (newobj == NULL)
2052 return NULL;
2053 s_new = PyString_AsString(newobj);
2054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2056 if (islower(c)) {
2057 if (!previous_is_cased)
2058 c = toupper(c);
2059 previous_is_cased = 1;
2060 } else if (isupper(c)) {
2061 if (previous_is_cased)
2062 c = tolower(c);
2063 previous_is_cased = 1;
2064 } else
2065 previous_is_cased = 0;
2066 *s_new++ = c;
2067 }
2068 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002069}
2070
2071PyDoc_STRVAR(capitalize__doc__,
2072"S.capitalize() -> string\n\
2073\n\
2074Return a copy of the string S with only its first character\n\
2075capitalized.");
2076
2077static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002078string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002079{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002080 char *s = PyString_AS_STRING(self), *s_new;
2081 Py_ssize_t i, n = PyString_GET_SIZE(self);
2082 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002083
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002084 newobj = PyString_FromStringAndSize(NULL, n);
2085 if (newobj == NULL)
2086 return NULL;
2087 s_new = PyString_AsString(newobj);
2088 if (0 < n) {
2089 int c = Py_CHARMASK(*s++);
2090 if (islower(c))
2091 *s_new = toupper(c);
2092 else
2093 *s_new = c;
2094 s_new++;
2095 }
2096 for (i = 1; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2098 if (isupper(c))
2099 *s_new = tolower(c);
2100 else
2101 *s_new = c;
2102 s_new++;
2103 }
2104 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002105}
2106
2107
2108PyDoc_STRVAR(count__doc__,
2109"S.count(sub[, start[, end]]) -> int\n\
2110\n\
2111Return the number of non-overlapping occurrences of substring sub in\n\
2112string S[start:end]. Optional arguments start and end are interpreted\n\
2113as in slice notation.");
2114
2115static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002116string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002117{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002118 PyObject *sub_obj;
2119 const char *str = PyString_AS_STRING(self), *sub;
2120 Py_ssize_t sub_len;
2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002122
Jesus Cea44e81682011-04-20 16:39:15 +02002123 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002125
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002126 if (PyString_Check(sub_obj)) {
2127 sub = PyString_AS_STRING(sub_obj);
2128 sub_len = PyString_GET_SIZE(sub_obj);
2129 }
Christian Heimes44720832008-05-26 13:01:01 +00002130#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002131 else if (PyUnicode_Check(sub_obj)) {
2132 Py_ssize_t count;
2133 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2134 if (count == -1)
2135 return NULL;
2136 else
2137 return PyInt_FromSsize_t(count);
2138 }
Christian Heimes44720832008-05-26 13:01:01 +00002139#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002140 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2141 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002142
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002143 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002144
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002145 return PyInt_FromSsize_t(
2146 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2147 );
Christian Heimes44720832008-05-26 13:01:01 +00002148}
2149
2150PyDoc_STRVAR(swapcase__doc__,
2151"S.swapcase() -> string\n\
2152\n\
2153Return a copy of the string S with uppercase characters\n\
2154converted to lowercase and vice versa.");
2155
2156static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002157string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002158{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002159 char *s = PyString_AS_STRING(self), *s_new;
2160 Py_ssize_t i, n = PyString_GET_SIZE(self);
2161 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002162
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002163 newobj = PyString_FromStringAndSize(NULL, n);
2164 if (newobj == NULL)
2165 return NULL;
2166 s_new = PyString_AsString(newobj);
2167 for (i = 0; i < n; i++) {
2168 int c = Py_CHARMASK(*s++);
2169 if (islower(c)) {
2170 *s_new = toupper(c);
2171 }
2172 else if (isupper(c)) {
2173 *s_new = tolower(c);
2174 }
2175 else
2176 *s_new = c;
2177 s_new++;
2178 }
2179 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002180}
2181
2182
2183PyDoc_STRVAR(translate__doc__,
2184"S.translate(table [,deletechars]) -> string\n\
2185\n\
2186Return a copy of the string S, where all characters occurring\n\
2187in the optional argument deletechars are removed, and the\n\
2188remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002189translation table, which must be a string of length 256 or None.\n\
2190If the table argument is None, no translation is applied and\n\
2191the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002192
2193static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002194string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002195{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002205
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2213 }
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2217 }
Christian Heimes44720832008-05-26 13:01:01 +00002218#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2227 }
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 }
Christian Heimes44720832008-05-26 13:01:01 +00002230#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002233
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2238 }
Christian Heimes44720832008-05-26 13:01:01 +00002239
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2244 }
Christian Heimes44720832008-05-26 13:01:01 +00002245#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2250 }
Christian Heimes44720832008-05-26 13:01:01 +00002251#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2254 }
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2258 }
Christian Heimes44720832008-05-26 13:01:01 +00002259
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2262 if (result == NULL)
2263 return NULL;
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002266
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2272 changed = 1;
2273 }
2274 if (changed || !PyString_CheckExact(input_obj))
2275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2279 }
Christian Heimes44720832008-05-26 13:01:01 +00002280
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2287 }
Christian Heimes44720832008-05-26 13:01:01 +00002288
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002291
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2298 }
2299 if (!changed && PyString_CheckExact(input_obj)) {
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
2307 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002308}
2309
2310
Christian Heimes44720832008-05-26 13:01:01 +00002311/* find and count characters and substrings */
2312
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002313#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002314 ((char *)memchr((const void *)(target), c, target_len))
2315
2316/* String ops must return a string. */
2317/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002318Py_LOCAL(PyStringObject *)
2319return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002321 if (PyString_CheckExact(self)) {
2322 Py_INCREF(self);
2323 return self;
2324 }
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002328}
2329
2330Py_LOCAL_INLINE(Py_ssize_t)
2331countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002336
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2342 }
2343 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002344}
2345
Christian Heimes44720832008-05-26 13:01:01 +00002346
2347/* Algorithms for different cases of string replacement */
2348
2349/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002350Py_LOCAL(PyStringObject *)
2351replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002354{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002359
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002360 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002361
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002366
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2374 }
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2380 }
Christian Heimes44720832008-05-26 13:01:01 +00002381
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2384 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002390
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002395
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 }
2401
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002406}
2407
2408/* Special case for deleting a single character */
2409/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410Py_LOCAL(PyStringObject *)
2411replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002412 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002413{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
2418 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002419
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2426 }
Christian Heimes44720832008-05-26 13:01:01 +00002427
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002430
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433 return NULL;
2434 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002435
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2445 }
2446 Py_MEMCPY(result_s, start, end-start);
2447
2448 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002449}
2450
2451/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002453Py_LOCAL(PyStringObject *)
2454replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002469
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2473 }
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002481
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002482 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002483
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2489 0);
2490 if (offset == -1)
2491 break;
2492 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002493
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002494 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002495
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002496 result_s += (next-start);
2497 start = next+from_len;
2498 }
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002501}
2502
2503/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002504Py_LOCAL(PyStringObject *)
2505replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002508{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002512
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002517 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002518
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2522 }
Christian Heimes44720832008-05-26 13:01:01 +00002523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526 if (result == NULL)
2527 return NULL;
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002530
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002536
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2543 }
Christian Heimes44720832008-05-26 13:01:01 +00002544
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002545 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002546}
2547
2548/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549Py_LOCAL(PyStringObject *)
2550replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002554{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002559
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002561
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2567 0);
2568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2571 }
Christian Heimes44720832008-05-26 13:01:01 +00002572
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575 if (result == NULL)
2576 return NULL;
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002579
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2589 0);
2590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2594 }
Christian Heimes44720832008-05-26 13:01:01 +00002595
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002596 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002597}
2598
2599/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002600Py_LOCAL(PyStringObject *)
2601replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002605{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2619 }
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2632 }
Christian Heimes44720832008-05-26 13:01:01 +00002633
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636 return NULL;
2637 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002638
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
Christian Heimes44720832008-05-26 13:01:01 +00002645
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2658 }
2659 }
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002662
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002663 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002664}
2665
2666/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002667Py_LOCAL(PyStringObject *)
2668replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002677
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002680
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002684
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2688 }
Christian Heimes44720832008-05-26 13:01:01 +00002689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2696 }
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
Christian Heimes44720832008-05-26 13:01:01 +00002702
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705 return NULL;
2706 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002707
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
2714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2729 }
2730 }
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002733
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002734 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002735}
2736
2737
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002738Py_LOCAL(PyStringObject *)
2739replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002743{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2749 }
Christian Heimes44720832008-05-26 13:01:01 +00002750
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2755 }
Christian Heimes44720832008-05-26 13:01:01 +00002756
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002757 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002758
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2764 }
Christian Heimes44720832008-05-26 13:01:01 +00002765
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2771 }
Christian Heimes44720832008-05-26 13:01:01 +00002772
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2780 }
2781 }
Christian Heimes44720832008-05-26 13:01:01 +00002782
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002783 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002784
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2795 }
2796 }
Christian Heimes44720832008-05-26 13:01:01 +00002797
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805 }
Christian Heimes44720832008-05-26 13:01:01 +00002806}
2807
2808PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002809"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002810\n\
2811Return a copy of string S with all occurrences of substring\n\
2812old replaced by new. If the optional argument count is\n\
2813given, only the first count occurrences are replaced.");
2814
2815static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002816string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002817{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2829 }
Christian Heimes44720832008-05-26 13:01:01 +00002830#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002834#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002837
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2841 }
Christian Heimes44720832008-05-26 13:01:01 +00002842#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002846#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002849
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 return (PyObject *)replace((PyStringObject *) self,
2851 from_s, from_len,
2852 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002853}
2854
2855/** End DALKE **/
2856
2857/* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2860 */
2861Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002862_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002863 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002864{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002865 Py_ssize_t len = PyString_GET_SIZE(self);
2866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002869
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2873 }
Christian Heimes44720832008-05-26 13:01:01 +00002874#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002878#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
2881 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002882
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002883 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002893
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002894 if (end-slen > start)
2895 start = end - slen;
2896 }
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002900}
2901
2902
2903PyDoc_STRVAR(startswith__doc__,
2904"S.startswith(prefix[, start[, end]]) -> bool\n\
2905\n\
2906Return True if S starts with the specified prefix, False otherwise.\n\
2907With optional start, test S beginning at that position.\n\
2908With optional end, stop comparing S at that position.\n\
2909prefix can also be a tuple of strings to try.");
2910
2911static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002912string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002913{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002918
Jesus Cea44e81682011-04-20 16:39:15 +02002919 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002920 return NULL;
2921 if (PyTuple_Check(subobj)) {
2922 Py_ssize_t i;
2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924 result = _string_tailmatch(self,
2925 PyTuple_GET_ITEM(subobj, i),
2926 start, end, -1);
2927 if (result == -1)
2928 return NULL;
2929 else if (result) {
2930 Py_RETURN_TRUE;
2931 }
2932 }
2933 Py_RETURN_FALSE;
2934 }
2935 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002936 if (result == -1) {
2937 if (PyErr_ExceptionMatches(PyExc_TypeError))
2938 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2939 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002940 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002941 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002942 else
2943 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002944}
2945
2946
2947PyDoc_STRVAR(endswith__doc__,
2948"S.endswith(suffix[, start[, end]]) -> bool\n\
2949\n\
2950Return True if S ends with the specified suffix, False otherwise.\n\
2951With optional start, test S beginning at that position.\n\
2952With optional end, stop comparing S at that position.\n\
2953suffix can also be a tuple of strings to try.");
2954
2955static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002956string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002957{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002958 Py_ssize_t start = 0;
2959 Py_ssize_t end = PY_SSIZE_T_MAX;
2960 PyObject *subobj;
2961 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002962
Jesus Cea44e81682011-04-20 16:39:15 +02002963 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002964 return NULL;
2965 if (PyTuple_Check(subobj)) {
2966 Py_ssize_t i;
2967 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2968 result = _string_tailmatch(self,
2969 PyTuple_GET_ITEM(subobj, i),
2970 start, end, +1);
2971 if (result == -1)
2972 return NULL;
2973 else if (result) {
2974 Py_RETURN_TRUE;
2975 }
2976 }
2977 Py_RETURN_FALSE;
2978 }
2979 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002980 if (result == -1) {
2981 if (PyErr_ExceptionMatches(PyExc_TypeError))
2982 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2983 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002984 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002985 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002986 else
2987 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002988}
2989
2990
2991PyDoc_STRVAR(encode__doc__,
2992"S.encode([encoding[,errors]]) -> object\n\
2993\n\
2994Encodes S using the codec registered for encoding. encoding defaults\n\
2995to the default encoding. errors may be given to set a different error\n\
2996handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2997a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2998'xmlcharrefreplace' as well as any other name registered with\n\
2999codecs.register_error that is able to handle UnicodeEncodeErrors.");
3000
3001static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003002string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003003{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003004 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003005 char *encoding = NULL;
3006 char *errors = NULL;
3007 PyObject *v;
3008
Benjamin Peterson332d7212009-09-18 21:14:55 +00003009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003010 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003011 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003012 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003013 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003014 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003015 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003016 PyErr_Format(PyExc_TypeError,
3017 "encoder did not return a string/unicode object "
3018 "(type=%.400s)",
3019 Py_TYPE(v)->tp_name);
3020 Py_DECREF(v);
3021 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003022 }
3023 return v;
3024
3025 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003026 return NULL;
3027}
3028
Christian Heimes44720832008-05-26 13:01:01 +00003029
3030PyDoc_STRVAR(decode__doc__,
3031"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032\n\
Christian Heimes44720832008-05-26 13:01:01 +00003033Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003034to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003035handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3036a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003037as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003038able to handle UnicodeDecodeErrors.");
3039
3040static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003041string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003042{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003043 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003044 char *encoding = NULL;
3045 char *errors = NULL;
3046 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003047
Benjamin Peterson332d7212009-09-18 21:14:55 +00003048 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003049 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003050 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003051 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003052 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003053 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003054 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003055 PyErr_Format(PyExc_TypeError,
3056 "decoder did not return a string/unicode object "
3057 "(type=%.400s)",
3058 Py_TYPE(v)->tp_name);
3059 Py_DECREF(v);
3060 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003061 }
3062 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003063
Christian Heimes44720832008-05-26 13:01:01 +00003064 onError:
3065 return NULL;
3066}
3067
3068
3069PyDoc_STRVAR(expandtabs__doc__,
3070"S.expandtabs([tabsize]) -> string\n\
3071\n\
3072Return a copy of S where all tab characters are expanded using spaces.\n\
3073If tabsize is not given, a tab size of 8 characters is assumed.");
3074
3075static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003076string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003077{
3078 const char *e, *p, *qe;
3079 char *q;
3080 Py_ssize_t i, j, incr;
3081 PyObject *u;
3082 int tabsize = 8;
3083
3084 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003085 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003086
3087 /* First pass: determine size of output string */
3088 i = 0; /* chars up to and including most recent \n or \r */
3089 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003090 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3091 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003092 if (*p == '\t') {
3093 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003094 incr = tabsize - (j % tabsize);
3095 if (j > PY_SSIZE_T_MAX - incr)
3096 goto overflow1;
3097 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003098 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003099 }
3100 else {
3101 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003102 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003103 j++;
3104 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003105 if (i > PY_SSIZE_T_MAX - j)
3106 goto overflow1;
3107 i += j;
3108 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003109 }
3110 }
Christian Heimes44720832008-05-26 13:01:01 +00003111
3112 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003113 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003114
3115 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003116 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003117 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003118 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003119
3120 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003121 q = PyString_AS_STRING(u); /* next output char */
3122 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003123
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003124 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003125 if (*p == '\t') {
3126 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003127 i = tabsize - (j % tabsize);
3128 j += i;
3129 while (i--) {
3130 if (q >= qe)
3131 goto overflow2;
3132 *q++ = ' ';
3133 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003134 }
3135 }
3136 else {
3137 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003138 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003139 *q++ = *p;
3140 j++;
3141 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003142 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003143 }
Christian Heimes44720832008-05-26 13:01:01 +00003144
3145 return u;
3146
3147 overflow2:
3148 Py_DECREF(u);
3149 overflow1:
3150 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3151 return NULL;
3152}
3153
3154Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003155pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003156{
3157 PyObject *u;
3158
3159 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003160 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003161 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003162 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003163
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003164 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003165 Py_INCREF(self);
3166 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003167 }
3168
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003169 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003170 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003171 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003172 if (left)
3173 memset(PyString_AS_STRING(u), fill, left);
3174 Py_MEMCPY(PyString_AS_STRING(u) + left,
3175 PyString_AS_STRING(self),
3176 PyString_GET_SIZE(self));
3177 if (right)
3178 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3179 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003180 }
3181
3182 return u;
3183}
3184
3185PyDoc_STRVAR(ljust__doc__,
3186"S.ljust(width[, fillchar]) -> string\n"
3187"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003188"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003189"done using the specified fill character (default is a space).");
3190
3191static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003192string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003193{
3194 Py_ssize_t width;
3195 char fillchar = ' ';
3196
3197 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003198 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003199
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003200 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003201 Py_INCREF(self);
3202 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003203 }
3204
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003205 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003206}
3207
3208
3209PyDoc_STRVAR(rjust__doc__,
3210"S.rjust(width[, fillchar]) -> string\n"
3211"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003212"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003213"done using the specified fill character (default is a space)");
3214
3215static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003216string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003217{
3218 Py_ssize_t width;
3219 char fillchar = ' ';
3220
3221 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003222 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003223
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003224 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003225 Py_INCREF(self);
3226 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003227 }
3228
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003229 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003230}
3231
3232
3233PyDoc_STRVAR(center__doc__,
3234"S.center(width[, fillchar]) -> string\n"
3235"\n"
3236"Return S centered in a string of length width. Padding is\n"
3237"done using the specified fill character (default is a space)");
3238
3239static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003240string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003241{
3242 Py_ssize_t marg, left;
3243 Py_ssize_t width;
3244 char fillchar = ' ';
3245
3246 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003247 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003248
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003249 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003250 Py_INCREF(self);
3251 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003252 }
3253
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003254 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003255 left = marg / 2 + (marg & width & 1);
3256
3257 return pad(self, left, marg - left, fillchar);
3258}
3259
3260PyDoc_STRVAR(zfill__doc__,
3261"S.zfill(width) -> string\n"
3262"\n"
3263"Pad a numeric string S with zeros on the left, to fill a field\n"
3264"of the specified width. The string S is never truncated.");
3265
3266static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003267string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003268{
3269 Py_ssize_t fill;
3270 PyObject *s;
3271 char *p;
3272 Py_ssize_t width;
3273
3274 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003275 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003276
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003277 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003278 if (PyString_CheckExact(self)) {
3279 Py_INCREF(self);
3280 return (PyObject*) self;
3281 }
3282 else
3283 return PyString_FromStringAndSize(
3284 PyString_AS_STRING(self),
3285 PyString_GET_SIZE(self)
3286 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003287 }
3288
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003289 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003290
Christian Heimes44720832008-05-26 13:01:01 +00003291 s = pad(self, fill, 0, '0');
3292
3293 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003294 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003295
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003296 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003297 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003298 /* move sign to beginning of string */
3299 p[0] = p[fill];
3300 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003301 }
3302
3303 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003304}
3305
Christian Heimes44720832008-05-26 13:01:01 +00003306PyDoc_STRVAR(isspace__doc__,
3307"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308\n\
Christian Heimes44720832008-05-26 13:01:01 +00003309Return True if all characters in S are whitespace\n\
3310and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311
Christian Heimes44720832008-05-26 13:01:01 +00003312static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003313string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003314{
Christian Heimes44720832008-05-26 13:01:01 +00003315 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003316 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003317 register const unsigned char *e;
3318
3319 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003320 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003321 isspace(*p))
3322 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003323
3324 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003325 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003326 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003327
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003328 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003329 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003330 if (!isspace(*p))
3331 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003332 }
Christian Heimes44720832008-05-26 13:01:01 +00003333 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003334}
3335
Christian Heimes44720832008-05-26 13:01:01 +00003336
3337PyDoc_STRVAR(isalpha__doc__,
3338"S.isalpha() -> bool\n\
3339\n\
3340Return True if all characters in S are alphabetic\n\
3341and there is at least one character in S, False otherwise.");
3342
3343static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003344string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003345{
Christian Heimes44720832008-05-26 13:01:01 +00003346 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003347 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003348 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349
Christian Heimes44720832008-05-26 13:01:01 +00003350 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003351 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003352 isalpha(*p))
3353 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003354
3355 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003356 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003357 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003358
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003360 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003361 if (!isalpha(*p))
3362 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003363 }
Christian Heimes44720832008-05-26 13:01:01 +00003364 return PyBool_FromLong(1);
3365}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003366
Christian Heimes44720832008-05-26 13:01:01 +00003367
3368PyDoc_STRVAR(isalnum__doc__,
3369"S.isalnum() -> bool\n\
3370\n\
3371Return True if all characters in S are alphanumeric\n\
3372and there is at least one character in S, False otherwise.");
3373
3374static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003375string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003376{
3377 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003378 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003379 register const unsigned char *e;
3380
3381 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003382 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003383 isalnum(*p))
3384 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003385
3386 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003387 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003388 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003389
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003390 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003391 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003392 if (!isalnum(*p))
3393 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003394 }
3395 return PyBool_FromLong(1);
3396}
3397
3398
3399PyDoc_STRVAR(isdigit__doc__,
3400"S.isdigit() -> bool\n\
3401\n\
3402Return True if all characters in S are digits\n\
3403and there is at least one character in S, False otherwise.");
3404
3405static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003406string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003407{
3408 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003409 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003410 register const unsigned char *e;
3411
3412 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003413 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003414 isdigit(*p))
3415 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003416
3417 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003418 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003419 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003420
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003422 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003423 if (!isdigit(*p))
3424 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003425 }
3426 return PyBool_FromLong(1);
3427}
3428
3429
3430PyDoc_STRVAR(islower__doc__,
3431"S.islower() -> bool\n\
3432\n\
3433Return True if all cased characters in S are lowercase and there is\n\
3434at least one cased character in S, False otherwise.");
3435
3436static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003437string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003438{
3439 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003440 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003441 register const unsigned char *e;
3442 int cased;
3443
3444 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003445 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003446 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003447
3448 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003450 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003451
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003452 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003453 cased = 0;
3454 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003455 if (isupper(*p))
3456 return PyBool_FromLong(0);
3457 else if (!cased && islower(*p))
3458 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003459 }
3460 return PyBool_FromLong(cased);
3461}
3462
3463
3464PyDoc_STRVAR(isupper__doc__,
3465"S.isupper() -> bool\n\
3466\n\
3467Return True if all cased characters in S are uppercase and there is\n\
3468at least one cased character in S, False otherwise.");
3469
3470static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003471string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003472{
3473 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003474 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003475 register const unsigned char *e;
3476 int cased;
3477
3478 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003479 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003480 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003481
3482 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003484 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003485
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003486 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003487 cased = 0;
3488 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003489 if (islower(*p))
3490 return PyBool_FromLong(0);
3491 else if (!cased && isupper(*p))
3492 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003493 }
3494 return PyBool_FromLong(cased);
3495}
3496
3497
3498PyDoc_STRVAR(istitle__doc__,
3499"S.istitle() -> bool\n\
3500\n\
3501Return True if S is a titlecased string and there is at least one\n\
3502character in S, i.e. uppercase characters may only follow uncased\n\
3503characters and lowercase characters only cased ones. Return False\n\
3504otherwise.");
3505
3506static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003507string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003508{
3509 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003510 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003511 register const unsigned char *e;
3512 int cased, previous_is_cased;
3513
3514 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003515 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003516 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003517
3518 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003519 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003520 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003521
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003522 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003523 cased = 0;
3524 previous_is_cased = 0;
3525 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003526 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003527
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003528 if (isupper(ch)) {
3529 if (previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3533 }
3534 else if (islower(ch)) {
3535 if (!previous_is_cased)
3536 return PyBool_FromLong(0);
3537 previous_is_cased = 1;
3538 cased = 1;
3539 }
3540 else
3541 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003542 }
3543 return PyBool_FromLong(cased);
3544}
3545
3546
3547PyDoc_STRVAR(splitlines__doc__,
Raymond Hettingeraad5b022012-06-02 01:42:58 -04003548"S.splitlines(keepends=False) -> list of strings\n\
Christian Heimes44720832008-05-26 13:01:01 +00003549\n\
3550Return a list of the lines in S, breaking at line boundaries.\n\
3551Line breaks are not included in the resulting list unless keepends\n\
3552is given and true.");
3553
3554static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003555string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003556{
Christian Heimes44720832008-05-26 13:01:01 +00003557 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003558
3559 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003560 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003561
Antoine Pitrou64672132010-01-13 07:55:48 +00003562 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003563 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3564 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003565 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003566}
3567
Robert Schuppenies51df0642008-06-01 16:16:17 +00003568PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003569"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003570
3571static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003572string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003573{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003574 Py_ssize_t res;
3575 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3576 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003577}
3578
Christian Heimes1a6387e2008-03-26 12:49:49 +00003579static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003580string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003581{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003582 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003583}
3584
Christian Heimes1a6387e2008-03-26 12:49:49 +00003585
Christian Heimes44720832008-05-26 13:01:01 +00003586#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587
Christian Heimes44720832008-05-26 13:01:01 +00003588PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003589"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003590\n\
Eric Smith6c840852010-11-06 19:43:44 +00003591Return a formatted version of S, using substitutions from args and kwargs.\n\
3592The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003593
Eric Smithdc13b792008-05-30 18:10:04 +00003594static PyObject *
3595string__format__(PyObject* self, PyObject* args)
3596{
3597 PyObject *format_spec;
3598 PyObject *result = NULL;
3599 PyObject *tmp = NULL;
3600
3601 /* If 2.x, convert format_spec to the same type as value */
3602 /* This is to allow things like u''.format('') */
3603 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003604 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003605 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003606 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3607 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3608 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003609 }
3610 tmp = PyObject_Str(format_spec);
3611 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003612 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003613 format_spec = tmp;
3614
3615 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003616 PyString_AS_STRING(format_spec),
3617 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003618done:
3619 Py_XDECREF(tmp);
3620 return result;
3621}
3622
Christian Heimes44720832008-05-26 13:01:01 +00003623PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003624"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003625\n\
Eric Smith6c840852010-11-06 19:43:44 +00003626Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003627
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003628
Christian Heimes1a6387e2008-03-26 12:49:49 +00003629static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003630string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003631 /* Counterparts of the obsolete stropmodule functions; except
3632 string.maketrans(). */
3633 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3634 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3635 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3636 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3637 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3638 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3639 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3640 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3641 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3642 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3643 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3644 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3645 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3646 capitalize__doc__},
3647 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3648 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3649 endswith__doc__},
3650 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3651 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3652 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3653 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3654 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3655 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3656 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3657 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3658 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3659 rpartition__doc__},
3660 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3661 startswith__doc__},
3662 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3663 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3664 swapcase__doc__},
3665 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3666 translate__doc__},
3667 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3668 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3669 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3670 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3671 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3672 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3673 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3674 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3675 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3676 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3677 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3679 expandtabs__doc__},
3680 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3681 splitlines__doc__},
3682 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3683 sizeof__doc__},
3684 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3685 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003686};
3687
3688static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003689str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003690
Christian Heimes44720832008-05-26 13:01:01 +00003691static PyObject *
3692string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3693{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003694 PyObject *x = NULL;
3695 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003696
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003697 if (type != &PyString_Type)
3698 return str_subtype_new(type, args, kwds);
3699 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3700 return NULL;
3701 if (x == NULL)
3702 return PyString_FromString("");
3703 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003704}
3705
3706static PyObject *
3707str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3708{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003709 PyObject *tmp, *pnew;
3710 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003711
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003712 assert(PyType_IsSubtype(type, &PyString_Type));
3713 tmp = string_new(&PyString_Type, args, kwds);
3714 if (tmp == NULL)
3715 return NULL;
3716 assert(PyString_CheckExact(tmp));
3717 n = PyString_GET_SIZE(tmp);
3718 pnew = type->tp_alloc(type, n);
3719 if (pnew != NULL) {
3720 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3721 ((PyStringObject *)pnew)->ob_shash =
3722 ((PyStringObject *)tmp)->ob_shash;
3723 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3724 }
3725 Py_DECREF(tmp);
3726 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003727}
3728
3729static PyObject *
3730basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3731{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003732 PyErr_SetString(PyExc_TypeError,
3733 "The basestring type cannot be instantiated");
3734 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003735}
3736
3737static PyObject *
3738string_mod(PyObject *v, PyObject *w)
3739{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003740 if (!PyString_Check(v)) {
3741 Py_INCREF(Py_NotImplemented);
3742 return Py_NotImplemented;
3743 }
3744 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003745}
3746
3747PyDoc_STRVAR(basestring_doc,
3748"Type basestring cannot be instantiated; it is the base for str and unicode.");
3749
3750static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003751 0, /*nb_add*/
3752 0, /*nb_subtract*/
3753 0, /*nb_multiply*/
3754 0, /*nb_divide*/
3755 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003756};
3757
3758
3759PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003760 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3761 "basestring",
3762 0,
3763 0,
3764 0, /* tp_dealloc */
3765 0, /* tp_print */
3766 0, /* tp_getattr */
3767 0, /* tp_setattr */
3768 0, /* tp_compare */
3769 0, /* tp_repr */
3770 0, /* tp_as_number */
3771 0, /* tp_as_sequence */
3772 0, /* tp_as_mapping */
3773 0, /* tp_hash */
3774 0, /* tp_call */
3775 0, /* tp_str */
3776 0, /* tp_getattro */
3777 0, /* tp_setattro */
3778 0, /* tp_as_buffer */
3779 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3780 basestring_doc, /* tp_doc */
3781 0, /* tp_traverse */
3782 0, /* tp_clear */
3783 0, /* tp_richcompare */
3784 0, /* tp_weaklistoffset */
3785 0, /* tp_iter */
3786 0, /* tp_iternext */
3787 0, /* tp_methods */
3788 0, /* tp_members */
3789 0, /* tp_getset */
3790 &PyBaseObject_Type, /* tp_base */
3791 0, /* tp_dict */
3792 0, /* tp_descr_get */
3793 0, /* tp_descr_set */
3794 0, /* tp_dictoffset */
3795 0, /* tp_init */
3796 0, /* tp_alloc */
3797 basestring_new, /* tp_new */
3798 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003799};
3800
3801PyDoc_STRVAR(string_doc,
Chris Jerdonekad4b0002012-10-07 20:37:54 -07003802"str(object='') -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003803\n\
3804Return a nice string representation of the object.\n\
3805If the argument is a string, the return value is the same object.");
3806
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003807PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003808 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3809 "str",
3810 PyStringObject_SIZE,
3811 sizeof(char),
3812 string_dealloc, /* tp_dealloc */
3813 (printfunc)string_print, /* tp_print */
3814 0, /* tp_getattr */
3815 0, /* tp_setattr */
3816 0, /* tp_compare */
3817 string_repr, /* tp_repr */
3818 &string_as_number, /* tp_as_number */
3819 &string_as_sequence, /* tp_as_sequence */
3820 &string_as_mapping, /* tp_as_mapping */
3821 (hashfunc)string_hash, /* tp_hash */
3822 0, /* tp_call */
3823 string_str, /* tp_str */
3824 PyObject_GenericGetAttr, /* tp_getattro */
3825 0, /* tp_setattro */
3826 &string_as_buffer, /* tp_as_buffer */
3827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3828 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3829 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3830 string_doc, /* tp_doc */
3831 0, /* tp_traverse */
3832 0, /* tp_clear */
3833 (richcmpfunc)string_richcompare, /* tp_richcompare */
3834 0, /* tp_weaklistoffset */
3835 0, /* tp_iter */
3836 0, /* tp_iternext */
3837 string_methods, /* tp_methods */
3838 0, /* tp_members */
3839 0, /* tp_getset */
3840 &PyBaseString_Type, /* tp_base */
3841 0, /* tp_dict */
3842 0, /* tp_descr_get */
3843 0, /* tp_descr_set */
3844 0, /* tp_dictoffset */
3845 0, /* tp_init */
3846 0, /* tp_alloc */
3847 string_new, /* tp_new */
3848 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003849};
3850
3851void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003852PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003853{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003854 register PyObject *v;
3855 if (*pv == NULL)
3856 return;
3857 if (w == NULL || !PyString_Check(*pv)) {
3858 Py_DECREF(*pv);
3859 *pv = NULL;
3860 return;
3861 }
3862 v = string_concat((PyStringObject *) *pv, w);
3863 Py_DECREF(*pv);
3864 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003865}
3866
3867void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003868PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003869{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003870 PyString_Concat(pv, w);
3871 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003872}
3873
3874
3875/* The following function breaks the notion that strings are immutable:
3876 it changes the size of a string. We get away with this only if there
3877 is only one module referencing the object. You can also think of it
3878 as creating a new string object and destroying the old one, only
3879 more efficiently. In any case, don't use this if the string may
3880 already be known to some other part of the code...
3881 Note that if there's not enough memory to resize the string, the original
3882 string object at *pv is deallocated, *pv is set to NULL, an "out of
3883 memory" exception is set, and -1 is returned. Else (on success) 0 is
3884 returned, and the value in *pv may or may not be the same as on input.
3885 As always, an extra byte is allocated for a trailing \0 byte (newsize
3886 does *not* include that), and a trailing \0 byte is stored.
3887*/
3888
3889int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003890_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003891{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003892 register PyObject *v;
3893 register PyStringObject *sv;
3894 v = *pv;
3895 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3896 PyString_CHECK_INTERNED(v)) {
3897 *pv = 0;
3898 Py_DECREF(v);
3899 PyErr_BadInternalCall();
3900 return -1;
3901 }
3902 /* XXX UNREF/NEWREF interface should be more symmetrical */
3903 _Py_DEC_REFTOTAL;
3904 _Py_ForgetReference(v);
3905 *pv = (PyObject *)
3906 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3907 if (*pv == NULL) {
3908 PyObject_Del(v);
3909 PyErr_NoMemory();
3910 return -1;
3911 }
3912 _Py_NewReference(*pv);
3913 sv = (PyStringObject *) *pv;
3914 Py_SIZE(sv) = newsize;
3915 sv->ob_sval[newsize] = '\0';
3916 sv->ob_shash = -1; /* invalidate cached hash value */
3917 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003918}
3919
3920/* Helpers for formatstring */
3921
3922Py_LOCAL_INLINE(PyObject *)
3923getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3924{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003925 Py_ssize_t argidx = *p_argidx;
3926 if (argidx < arglen) {
3927 (*p_argidx)++;
3928 if (arglen < 0)
3929 return args;
3930 else
3931 return PyTuple_GetItem(args, argidx);
3932 }
3933 PyErr_SetString(PyExc_TypeError,
3934 "not enough arguments for format string");
3935 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003936}
3937
3938/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003939 * F_LJUST '-'
3940 * F_SIGN '+'
3941 * F_BLANK ' '
3942 * F_ALT '#'
3943 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003944 */
3945#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003946#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003947#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003948#define F_ALT (1<<3)
3949#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003950
Mark Dickinson18cfada2009-11-23 18:46:41 +00003951/* Returns a new reference to a PyString object, or NULL on failure. */
3952
3953static PyObject *
3954formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003955{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003956 char *p;
3957 PyObject *result;
3958 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003960 x = PyFloat_AsDouble(v);
3961 if (x == -1.0 && PyErr_Occurred()) {
3962 PyErr_Format(PyExc_TypeError, "float argument required, "
3963 "not %.200s", Py_TYPE(v)->tp_name);
3964 return NULL;
3965 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003966
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003967 if (prec < 0)
3968 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003969
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003970 p = PyOS_double_to_string(x, type, prec,
3971 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003972
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003973 if (p == NULL)
3974 return NULL;
3975 result = PyString_FromStringAndSize(p, strlen(p));
3976 PyMem_Free(p);
3977 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003978}
3979
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003980/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003981 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3982 * Python's regular ints.
3983 * Return value: a new PyString*, or NULL if error.
3984 * . *pbuf is set to point into it,
3985 * *plen set to the # of chars following that.
3986 * Caller must decref it when done using pbuf.
3987 * The string starting at *pbuf is of the form
3988 * "-"? ("0x" | "0X")? digit+
3989 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3990 * set in flags. The case of hex digits will be correct,
3991 * There will be at least prec digits, zero-filled on the left if
3992 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003993 * val object to be converted
3994 * flags bitmask of format flags; only F_ALT is looked at
3995 * prec minimum number of digits; 0-fill on left if needed
3996 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003997 *
3998 * CAUTION: o, x and X conversions on regular ints can never
3999 * produce a '-' sign, but can for Python's unbounded ints.
4000 */
4001PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004002_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004003 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004004{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004005 PyObject *result = NULL;
4006 char *buf;
4007 Py_ssize_t i;
4008 int sign; /* 1 if '-', else 0 */
4009 int len; /* number of characters */
4010 Py_ssize_t llen;
4011 int numdigits; /* len == numnondigits + numdigits */
4012 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004013
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004014 switch (type) {
4015 case 'd':
4016 case 'u':
4017 result = Py_TYPE(val)->tp_str(val);
4018 break;
4019 case 'o':
4020 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4021 break;
4022 case 'x':
4023 case 'X':
4024 numnondigits = 2;
4025 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4026 break;
4027 default:
4028 assert(!"'type' not in [duoxX]");
4029 }
4030 if (!result)
4031 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004032
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004033 buf = PyString_AsString(result);
4034 if (!buf) {
4035 Py_DECREF(result);
4036 return NULL;
4037 }
Christian Heimes44720832008-05-26 13:01:01 +00004038
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004039 /* To modify the string in-place, there can only be one reference. */
4040 if (Py_REFCNT(result) != 1) {
4041 PyErr_BadInternalCall();
4042 return NULL;
4043 }
4044 llen = PyString_Size(result);
4045 if (llen > INT_MAX) {
4046 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4047 return NULL;
4048 }
4049 len = (int)llen;
4050 if (buf[len-1] == 'L') {
4051 --len;
4052 buf[len] = '\0';
4053 }
4054 sign = buf[0] == '-';
4055 numnondigits += sign;
4056 numdigits = len - numnondigits;
4057 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004058
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004059 /* Get rid of base marker unless F_ALT */
4060 if ((flags & F_ALT) == 0) {
4061 /* Need to skip 0x, 0X or 0. */
4062 int skipped = 0;
4063 switch (type) {
4064 case 'o':
4065 assert(buf[sign] == '0');
4066 /* If 0 is only digit, leave it alone. */
4067 if (numdigits > 1) {
4068 skipped = 1;
4069 --numdigits;
4070 }
4071 break;
4072 case 'x':
4073 case 'X':
4074 assert(buf[sign] == '0');
4075 assert(buf[sign + 1] == 'x');
4076 skipped = 2;
4077 numnondigits -= 2;
4078 break;
4079 }
4080 if (skipped) {
4081 buf += skipped;
4082 len -= skipped;
4083 if (sign)
4084 buf[0] = '-';
4085 }
4086 assert(len == numnondigits + numdigits);
4087 assert(numdigits > 0);
4088 }
Christian Heimes44720832008-05-26 13:01:01 +00004089
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004090 /* Fill with leading zeroes to meet minimum width. */
4091 if (prec > numdigits) {
4092 PyObject *r1 = PyString_FromStringAndSize(NULL,
4093 numnondigits + prec);
4094 char *b1;
4095 if (!r1) {
4096 Py_DECREF(result);
4097 return NULL;
4098 }
4099 b1 = PyString_AS_STRING(r1);
4100 for (i = 0; i < numnondigits; ++i)
4101 *b1++ = *buf++;
4102 for (i = 0; i < prec - numdigits; i++)
4103 *b1++ = '0';
4104 for (i = 0; i < numdigits; i++)
4105 *b1++ = *buf++;
4106 *b1 = '\0';
4107 Py_DECREF(result);
4108 result = r1;
4109 buf = PyString_AS_STRING(result);
4110 len = numnondigits + prec;
4111 }
Christian Heimes44720832008-05-26 13:01:01 +00004112
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004113 /* Fix up case for hex conversions. */
4114 if (type == 'X') {
4115 /* Need to convert all lower case letters to upper case.
4116 and need to convert 0x to 0X (and -0x to -0X). */
4117 for (i = 0; i < len; i++)
4118 if (buf[i] >= 'a' && buf[i] <= 'x')
4119 buf[i] -= 'a'-'A';
4120 }
4121 *pbuf = buf;
4122 *plen = len;
4123 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004124}
4125
4126Py_LOCAL_INLINE(int)
4127formatint(char *buf, size_t buflen, int flags,
4128 int prec, int type, PyObject *v)
4129{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004130 /* fmt = '%#.' + `prec` + 'l' + `type`
4131 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4132 + 1 + 1 = 24 */
4133 char fmt[64]; /* plenty big enough! */
4134 char *sign;
4135 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004136
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004137 x = PyInt_AsLong(v);
4138 if (x == -1 && PyErr_Occurred()) {
4139 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4140 Py_TYPE(v)->tp_name);
4141 return -1;
4142 }
4143 if (x < 0 && type == 'u') {
4144 type = 'd';
4145 }
4146 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4147 sign = "-";
4148 else
4149 sign = "";
4150 if (prec < 0)
4151 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004152
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004153 if ((flags & F_ALT) &&
4154 (type == 'x' || type == 'X')) {
4155 /* When converting under %#x or %#X, there are a number
4156 * of issues that cause pain:
4157 * - when 0 is being converted, the C standard leaves off
4158 * the '0x' or '0X', which is inconsistent with other
4159 * %#x/%#X conversions and inconsistent with Python's
4160 * hex() function
4161 * - there are platforms that violate the standard and
4162 * convert 0 with the '0x' or '0X'
4163 * (Metrowerks, Compaq Tru64)
4164 * - there are platforms that give '0x' when converting
4165 * under %#X, but convert 0 in accordance with the
4166 * standard (OS/2 EMX)
4167 *
4168 * We can achieve the desired consistency by inserting our
4169 * own '0x' or '0X' prefix, and substituting %x/%X in place
4170 * of %#x/%#X.
4171 *
4172 * Note that this is the same approach as used in
4173 * formatint() in unicodeobject.c
4174 */
4175 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4176 sign, type, prec, type);
4177 }
4178 else {
4179 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4180 sign, (flags&F_ALT) ? "#" : "",
4181 prec, type);
4182 }
Christian Heimes44720832008-05-26 13:01:01 +00004183
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004184 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4185 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4186 */
4187 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4188 PyErr_SetString(PyExc_OverflowError,
4189 "formatted integer is too long (precision too large?)");
4190 return -1;
4191 }
4192 if (sign[0])
4193 PyOS_snprintf(buf, buflen, fmt, -x);
4194 else
4195 PyOS_snprintf(buf, buflen, fmt, x);
4196 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004197}
4198
4199Py_LOCAL_INLINE(int)
4200formatchar(char *buf, size_t buflen, PyObject *v)
4201{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004202 /* presume that the buffer is at least 2 characters long */
4203 if (PyString_Check(v)) {
4204 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4205 return -1;
4206 }
4207 else {
4208 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4209 return -1;
4210 }
4211 buf[1] = '\0';
4212 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004213}
4214
4215/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4216
Mark Dickinson18cfada2009-11-23 18:46:41 +00004217 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004218 chars are formatted. XXX This is a magic number. Each formatting
4219 routine does bounds checking to ensure no overflow, but a better
4220 solution may be to malloc a buffer of appropriate size for each
4221 format. For now, the current solution is sufficient.
4222*/
4223#define FORMATBUFLEN (size_t)120
4224
4225PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004226PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004227{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004228 char *fmt, *res;
4229 Py_ssize_t arglen, argidx;
4230 Py_ssize_t reslen, rescnt, fmtcnt;
4231 int args_owned = 0;
4232 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004233#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004234 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004235#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004236 PyObject *dict = NULL;
4237 if (format == NULL || !PyString_Check(format) || args == NULL) {
4238 PyErr_BadInternalCall();
4239 return NULL;
4240 }
4241 orig_args = args;
4242 fmt = PyString_AS_STRING(format);
4243 fmtcnt = PyString_GET_SIZE(format);
4244 reslen = rescnt = fmtcnt + 100;
4245 result = PyString_FromStringAndSize((char *)NULL, reslen);
4246 if (result == NULL)
4247 return NULL;
4248 res = PyString_AsString(result);
4249 if (PyTuple_Check(args)) {
4250 arglen = PyTuple_GET_SIZE(args);
4251 argidx = 0;
4252 }
4253 else {
4254 arglen = -1;
4255 argidx = -2;
4256 }
Benjamin Peterson23d49d32012-08-28 17:55:35 -04004257 if (PyMapping_Check(args) && !PyTuple_Check(args) &&
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004258 !PyObject_TypeCheck(args, &PyBaseString_Type))
4259 dict = args;
4260 while (--fmtcnt >= 0) {
4261 if (*fmt != '%') {
4262 if (--rescnt < 0) {
4263 rescnt = fmtcnt + 100;
4264 reslen += rescnt;
4265 if (_PyString_Resize(&result, reslen))
4266 return NULL;
4267 res = PyString_AS_STRING(result)
4268 + reslen - rescnt;
4269 --rescnt;
4270 }
4271 *res++ = *fmt++;
4272 }
4273 else {
4274 /* Got a format specifier */
4275 int flags = 0;
4276 Py_ssize_t width = -1;
4277 int prec = -1;
4278 int c = '\0';
4279 int fill;
4280 int isnumok;
4281 PyObject *v = NULL;
4282 PyObject *temp = NULL;
4283 char *pbuf;
4284 int sign;
4285 Py_ssize_t len;
4286 char formatbuf[FORMATBUFLEN];
4287 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004288#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004289 char *fmt_start = fmt;
4290 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004291#endif
4292
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004293 fmt++;
4294 if (*fmt == '(') {
4295 char *keystart;
4296 Py_ssize_t keylen;
4297 PyObject *key;
4298 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004299
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004300 if (dict == NULL) {
4301 PyErr_SetString(PyExc_TypeError,
4302 "format requires a mapping");
4303 goto error;
4304 }
4305 ++fmt;
4306 --fmtcnt;
4307 keystart = fmt;
4308 /* Skip over balanced parentheses */
4309 while (pcount > 0 && --fmtcnt >= 0) {
4310 if (*fmt == ')')
4311 --pcount;
4312 else if (*fmt == '(')
4313 ++pcount;
4314 fmt++;
4315 }
4316 keylen = fmt - keystart - 1;
4317 if (fmtcnt < 0 || pcount > 0) {
4318 PyErr_SetString(PyExc_ValueError,
4319 "incomplete format key");
4320 goto error;
4321 }
4322 key = PyString_FromStringAndSize(keystart,
4323 keylen);
4324 if (key == NULL)
4325 goto error;
4326 if (args_owned) {
4327 Py_DECREF(args);
4328 args_owned = 0;
4329 }
4330 args = PyObject_GetItem(dict, key);
4331 Py_DECREF(key);
4332 if (args == NULL) {
4333 goto error;
4334 }
4335 args_owned = 1;
4336 arglen = -1;
4337 argidx = -2;
4338 }
4339 while (--fmtcnt >= 0) {
4340 switch (c = *fmt++) {
4341 case '-': flags |= F_LJUST; continue;
4342 case '+': flags |= F_SIGN; continue;
4343 case ' ': flags |= F_BLANK; continue;
4344 case '#': flags |= F_ALT; continue;
4345 case '0': flags |= F_ZERO; continue;
4346 }
4347 break;
4348 }
4349 if (c == '*') {
4350 v = getnextarg(args, arglen, &argidx);
4351 if (v == NULL)
4352 goto error;
4353 if (!PyInt_Check(v)) {
4354 PyErr_SetString(PyExc_TypeError,
4355 "* wants int");
4356 goto error;
4357 }
4358 width = PyInt_AsLong(v);
4359 if (width < 0) {
4360 flags |= F_LJUST;
4361 width = -width;
4362 }
4363 if (--fmtcnt >= 0)
4364 c = *fmt++;
4365 }
4366 else if (c >= 0 && isdigit(c)) {
4367 width = c - '0';
4368 while (--fmtcnt >= 0) {
4369 c = Py_CHARMASK(*fmt++);
4370 if (!isdigit(c))
4371 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004372 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004373 PyErr_SetString(
4374 PyExc_ValueError,
4375 "width too big");
4376 goto error;
4377 }
4378 width = width*10 + (c - '0');
4379 }
4380 }
4381 if (c == '.') {
4382 prec = 0;
4383 if (--fmtcnt >= 0)
4384 c = *fmt++;
4385 if (c == '*') {
4386 v = getnextarg(args, arglen, &argidx);
4387 if (v == NULL)
4388 goto error;
4389 if (!PyInt_Check(v)) {
4390 PyErr_SetString(
4391 PyExc_TypeError,
4392 "* wants int");
4393 goto error;
4394 }
4395 prec = PyInt_AsLong(v);
4396 if (prec < 0)
4397 prec = 0;
4398 if (--fmtcnt >= 0)
4399 c = *fmt++;
4400 }
4401 else if (c >= 0 && isdigit(c)) {
4402 prec = c - '0';
4403 while (--fmtcnt >= 0) {
4404 c = Py_CHARMASK(*fmt++);
4405 if (!isdigit(c))
4406 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004407 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004408 PyErr_SetString(
4409 PyExc_ValueError,
4410 "prec too big");
4411 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004412 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004413 prec = prec*10 + (c - '0');
4414 }
4415 }
4416 } /* prec */
4417 if (fmtcnt >= 0) {
4418 if (c == 'h' || c == 'l' || c == 'L') {
4419 if (--fmtcnt >= 0)
4420 c = *fmt++;
4421 }
4422 }
4423 if (fmtcnt < 0) {
4424 PyErr_SetString(PyExc_ValueError,
4425 "incomplete format");
4426 goto error;
4427 }
4428 if (c != '%') {
4429 v = getnextarg(args, arglen, &argidx);
4430 if (v == NULL)
4431 goto error;
4432 }
4433 sign = 0;
4434 fill = ' ';
4435 switch (c) {
4436 case '%':
4437 pbuf = "%";
4438 len = 1;
4439 break;
4440 case 's':
4441#ifdef Py_USING_UNICODE
4442 if (PyUnicode_Check(v)) {
4443 fmt = fmt_start;
4444 argidx = argidx_start;
4445 goto unicode;
4446 }
4447#endif
4448 temp = _PyObject_Str(v);
4449#ifdef Py_USING_UNICODE
4450 if (temp != NULL && PyUnicode_Check(temp)) {
4451 Py_DECREF(temp);
4452 fmt = fmt_start;
4453 argidx = argidx_start;
4454 goto unicode;
4455 }
4456#endif
4457 /* Fall through */
4458 case 'r':
4459 if (c == 'r')
4460 temp = PyObject_Repr(v);
4461 if (temp == NULL)
4462 goto error;
4463 if (!PyString_Check(temp)) {
4464 PyErr_SetString(PyExc_TypeError,
4465 "%s argument has non-string str()");
4466 Py_DECREF(temp);
4467 goto error;
4468 }
4469 pbuf = PyString_AS_STRING(temp);
4470 len = PyString_GET_SIZE(temp);
4471 if (prec >= 0 && len > prec)
4472 len = prec;
4473 break;
4474 case 'i':
4475 case 'd':
4476 case 'u':
4477 case 'o':
4478 case 'x':
4479 case 'X':
4480 if (c == 'i')
4481 c = 'd';
4482 isnumok = 0;
4483 if (PyNumber_Check(v)) {
4484 PyObject *iobj=NULL;
4485
4486 if (PyInt_Check(v) || (PyLong_Check(v))) {
4487 iobj = v;
4488 Py_INCREF(iobj);
4489 }
4490 else {
4491 iobj = PyNumber_Int(v);
4492 if (iobj==NULL) iobj = PyNumber_Long(v);
4493 }
4494 if (iobj!=NULL) {
4495 if (PyInt_Check(iobj)) {
4496 isnumok = 1;
4497 pbuf = formatbuf;
4498 len = formatint(pbuf,
4499 sizeof(formatbuf),
4500 flags, prec, c, iobj);
4501 Py_DECREF(iobj);
4502 if (len < 0)
4503 goto error;
4504 sign = 1;
4505 }
4506 else if (PyLong_Check(iobj)) {
4507 int ilen;
4508
4509 isnumok = 1;
4510 temp = _PyString_FormatLong(iobj, flags,
4511 prec, c, &pbuf, &ilen);
4512 Py_DECREF(iobj);
4513 len = ilen;
4514 if (!temp)
4515 goto error;
4516 sign = 1;
4517 }
4518 else {
4519 Py_DECREF(iobj);
4520 }
4521 }
4522 }
4523 if (!isnumok) {
4524 PyErr_Format(PyExc_TypeError,
4525 "%%%c format: a number is required, "
4526 "not %.200s", c, Py_TYPE(v)->tp_name);
4527 goto error;
4528 }
4529 if (flags & F_ZERO)
4530 fill = '0';
4531 break;
4532 case 'e':
4533 case 'E':
4534 case 'f':
4535 case 'F':
4536 case 'g':
4537 case 'G':
4538 temp = formatfloat(v, flags, prec, c);
4539 if (temp == NULL)
4540 goto error;
4541 pbuf = PyString_AS_STRING(temp);
4542 len = PyString_GET_SIZE(temp);
4543 sign = 1;
4544 if (flags & F_ZERO)
4545 fill = '0';
4546 break;
4547 case 'c':
4548#ifdef Py_USING_UNICODE
4549 if (PyUnicode_Check(v)) {
4550 fmt = fmt_start;
4551 argidx = argidx_start;
4552 goto unicode;
4553 }
4554#endif
4555 pbuf = formatbuf;
4556 len = formatchar(pbuf, sizeof(formatbuf), v);
4557 if (len < 0)
4558 goto error;
4559 break;
4560 default:
4561 PyErr_Format(PyExc_ValueError,
4562 "unsupported format character '%c' (0x%x) "
4563 "at index %zd",
4564 c, c,
4565 (Py_ssize_t)(fmt - 1 -
4566 PyString_AsString(format)));
4567 goto error;
4568 }
4569 if (sign) {
4570 if (*pbuf == '-' || *pbuf == '+') {
4571 sign = *pbuf++;
4572 len--;
4573 }
4574 else if (flags & F_SIGN)
4575 sign = '+';
4576 else if (flags & F_BLANK)
4577 sign = ' ';
4578 else
4579 sign = 0;
4580 }
4581 if (width < len)
4582 width = len;
4583 if (rescnt - (sign != 0) < width) {
4584 reslen -= rescnt;
4585 rescnt = width + fmtcnt + 100;
4586 reslen += rescnt;
4587 if (reslen < 0) {
4588 Py_DECREF(result);
4589 Py_XDECREF(temp);
4590 return PyErr_NoMemory();
4591 }
4592 if (_PyString_Resize(&result, reslen)) {
4593 Py_XDECREF(temp);
4594 return NULL;
4595 }
4596 res = PyString_AS_STRING(result)
4597 + reslen - rescnt;
4598 }
4599 if (sign) {
4600 if (fill != ' ')
4601 *res++ = sign;
4602 rescnt--;
4603 if (width > len)
4604 width--;
4605 }
4606 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4607 assert(pbuf[0] == '0');
4608 assert(pbuf[1] == c);
4609 if (fill != ' ') {
4610 *res++ = *pbuf++;
4611 *res++ = *pbuf++;
4612 }
4613 rescnt -= 2;
4614 width -= 2;
4615 if (width < 0)
4616 width = 0;
4617 len -= 2;
4618 }
4619 if (width > len && !(flags & F_LJUST)) {
4620 do {
4621 --rescnt;
4622 *res++ = fill;
4623 } while (--width > len);
4624 }
4625 if (fill == ' ') {
4626 if (sign)
4627 *res++ = sign;
4628 if ((flags & F_ALT) &&
4629 (c == 'x' || c == 'X')) {
4630 assert(pbuf[0] == '0');
4631 assert(pbuf[1] == c);
4632 *res++ = *pbuf++;
4633 *res++ = *pbuf++;
4634 }
4635 }
4636 Py_MEMCPY(res, pbuf, len);
4637 res += len;
4638 rescnt -= len;
4639 while (--width >= len) {
4640 --rescnt;
4641 *res++ = ' ';
4642 }
4643 if (dict && (argidx < arglen) && c != '%') {
4644 PyErr_SetString(PyExc_TypeError,
4645 "not all arguments converted during string formatting");
4646 Py_XDECREF(temp);
4647 goto error;
4648 }
4649 Py_XDECREF(temp);
4650 } /* '%' */
4651 } /* until end */
4652 if (argidx < arglen && !dict) {
4653 PyErr_SetString(PyExc_TypeError,
4654 "not all arguments converted during string formatting");
4655 goto error;
4656 }
4657 if (args_owned) {
4658 Py_DECREF(args);
4659 }
4660 if (_PyString_Resize(&result, reslen - rescnt))
4661 return NULL;
4662 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004663
4664#ifdef Py_USING_UNICODE
4665 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004666 if (args_owned) {
4667 Py_DECREF(args);
4668 args_owned = 0;
4669 }
4670 /* Fiddle args right (remove the first argidx arguments) */
4671 if (PyTuple_Check(orig_args) && argidx > 0) {
4672 PyObject *v;
4673 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4674 v = PyTuple_New(n);
4675 if (v == NULL)
4676 goto error;
4677 while (--n >= 0) {
4678 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4679 Py_INCREF(w);
4680 PyTuple_SET_ITEM(v, n, w);
4681 }
4682 args = v;
4683 } else {
4684 Py_INCREF(orig_args);
4685 args = orig_args;
4686 }
4687 args_owned = 1;
4688 /* Take what we have of the result and let the Unicode formatting
4689 function format the rest of the input. */
4690 rescnt = res - PyString_AS_STRING(result);
4691 if (_PyString_Resize(&result, rescnt))
4692 goto error;
4693 fmtcnt = PyString_GET_SIZE(format) - \
4694 (fmt - PyString_AS_STRING(format));
4695 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4696 if (format == NULL)
4697 goto error;
4698 v = PyUnicode_Format(format, args);
4699 Py_DECREF(format);
4700 if (v == NULL)
4701 goto error;
4702 /* Paste what we have (result) to what the Unicode formatting
4703 function returned (v) and return the result (or error) */
4704 w = PyUnicode_Concat(result, v);
4705 Py_DECREF(result);
4706 Py_DECREF(v);
4707 Py_DECREF(args);
4708 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004709#endif /* Py_USING_UNICODE */
4710
4711 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004712 Py_DECREF(result);
4713 if (args_owned) {
4714 Py_DECREF(args);
4715 }
4716 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004717}
4718
4719void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004720PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004721{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004722 register PyStringObject *s = (PyStringObject *)(*p);
4723 PyObject *t;
4724 if (s == NULL || !PyString_Check(s))
4725 Py_FatalError("PyString_InternInPlace: strings only please!");
4726 /* If it's a string subclass, we don't really know what putting
4727 it in the interned dict might do. */
4728 if (!PyString_CheckExact(s))
4729 return;
4730 if (PyString_CHECK_INTERNED(s))
4731 return;
4732 if (interned == NULL) {
4733 interned = PyDict_New();
4734 if (interned == NULL) {
4735 PyErr_Clear(); /* Don't leave an exception */
4736 return;
4737 }
4738 }
4739 t = PyDict_GetItem(interned, (PyObject *)s);
4740 if (t) {
4741 Py_INCREF(t);
4742 Py_DECREF(*p);
4743 *p = t;
4744 return;
4745 }
Christian Heimes44720832008-05-26 13:01:01 +00004746
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004747 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4748 PyErr_Clear();
4749 return;
4750 }
4751 /* The two references in interned are not counted by refcnt.
4752 The string deallocator will take care of this */
4753 Py_REFCNT(s) -= 2;
4754 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004755}
4756
4757void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004758PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004759{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004760 PyString_InternInPlace(p);
4761 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4762 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4763 Py_INCREF(*p);
4764 }
Christian Heimes44720832008-05-26 13:01:01 +00004765}
4766
4767
4768PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004769PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004771 PyObject *s = PyString_FromString(cp);
4772 if (s == NULL)
4773 return NULL;
4774 PyString_InternInPlace(&s);
4775 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004776}
4777
4778void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004779PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004781 int i;
4782 for (i = 0; i < UCHAR_MAX + 1; i++) {
4783 Py_XDECREF(characters[i]);
4784 characters[i] = NULL;
4785 }
4786 Py_XDECREF(nullstring);
4787 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004788}
4789
4790void _Py_ReleaseInternedStrings(void)
4791{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004792 PyObject *keys;
4793 PyStringObject *s;
4794 Py_ssize_t i, n;
4795 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004796
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004797 if (interned == NULL || !PyDict_Check(interned))
4798 return;
4799 keys = PyDict_Keys(interned);
4800 if (keys == NULL || !PyList_Check(keys)) {
4801 PyErr_Clear();
4802 return;
4803 }
Christian Heimes44720832008-05-26 13:01:01 +00004804
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004805 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4806 detector, interned strings are not forcibly deallocated; rather, we
4807 give them their stolen references back, and then clear and DECREF
4808 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004809
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004810 n = PyList_GET_SIZE(keys);
4811 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4812 n);
4813 for (i = 0; i < n; i++) {
4814 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4815 switch (s->ob_sstate) {
4816 case SSTATE_NOT_INTERNED:
4817 /* XXX Shouldn't happen */
4818 break;
4819 case SSTATE_INTERNED_IMMORTAL:
4820 Py_REFCNT(s) += 1;
4821 immortal_size += Py_SIZE(s);
4822 break;
4823 case SSTATE_INTERNED_MORTAL:
4824 Py_REFCNT(s) += 2;
4825 mortal_size += Py_SIZE(s);
4826 break;
4827 default:
4828 Py_FatalError("Inconsistent interned string state.");
4829 }
4830 s->ob_sstate = SSTATE_NOT_INTERNED;
4831 }
4832 fprintf(stderr, "total size of all interned strings: "
4833 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4834 "mortal/immortal\n", mortal_size, immortal_size);
4835 Py_DECREF(keys);
4836 PyDict_Clear(interned);
4837 Py_DECREF(interned);
4838 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004839}