blob: 49d18645e61f637686e311582abe454055847662 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 if (a->ob_shash != -1)
1266 return a->ob_shash;
1267 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001268 /*
1269 We make the hash of the empty string be 0, rather than using
1270 (prefix ^ suffix), since this slightly obfuscates the hash secret
1271 */
1272 if (len == 0) {
1273 a->ob_shash = 0;
1274 return 0;
1275 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001276 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001277 x = _Py_HashSecret.prefix;
1278 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001279 while (--len >= 0)
1280 x = (1000003*x) ^ *p++;
1281 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001282 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001283 if (x == -1)
1284 x = -2;
1285 a->ob_shash = x;
1286 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001287}
1288
1289static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001290string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001291{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001292 if (PyIndex_Check(item)) {
1293 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1294 if (i == -1 && PyErr_Occurred())
1295 return NULL;
1296 if (i < 0)
1297 i += PyString_GET_SIZE(self);
1298 return string_item(self, i);
1299 }
1300 else if (PySlice_Check(item)) {
1301 Py_ssize_t start, stop, step, slicelength, cur, i;
1302 char* source_buf;
1303 char* result_buf;
1304 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001305
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001306 if (PySlice_GetIndicesEx((PySliceObject*)item,
1307 PyString_GET_SIZE(self),
1308 &start, &stop, &step, &slicelength) < 0) {
1309 return NULL;
1310 }
Christian Heimes44720832008-05-26 13:01:01 +00001311
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001312 if (slicelength <= 0) {
1313 return PyString_FromStringAndSize("", 0);
1314 }
1315 else if (start == 0 && step == 1 &&
1316 slicelength == PyString_GET_SIZE(self) &&
1317 PyString_CheckExact(self)) {
1318 Py_INCREF(self);
1319 return (PyObject *)self;
1320 }
1321 else if (step == 1) {
1322 return PyString_FromStringAndSize(
1323 PyString_AS_STRING(self) + start,
1324 slicelength);
1325 }
1326 else {
1327 source_buf = PyString_AsString((PyObject*)self);
1328 result_buf = (char *)PyMem_Malloc(slicelength);
1329 if (result_buf == NULL)
1330 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001331
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001332 for (cur = start, i = 0; i < slicelength;
1333 cur += step, i++) {
1334 result_buf[i] = source_buf[cur];
1335 }
Christian Heimes44720832008-05-26 13:01:01 +00001336
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001337 result = PyString_FromStringAndSize(result_buf,
1338 slicelength);
1339 PyMem_Free(result_buf);
1340 return result;
1341 }
1342 }
1343 else {
1344 PyErr_Format(PyExc_TypeError,
1345 "string indices must be integers, not %.200s",
1346 Py_TYPE(item)->tp_name);
1347 return NULL;
1348 }
Christian Heimes44720832008-05-26 13:01:01 +00001349}
1350
1351static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001352string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001353{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001354 if ( index != 0 ) {
1355 PyErr_SetString(PyExc_SystemError,
1356 "accessing non-existent string segment");
1357 return -1;
1358 }
1359 *ptr = (void *)self->ob_sval;
1360 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001361}
1362
1363static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001364string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001365{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001366 PyErr_SetString(PyExc_TypeError,
1367 "Cannot use string as modifiable buffer");
1368 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001369}
1370
1371static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001372string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001373{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001374 if ( lenp )
1375 *lenp = Py_SIZE(self);
1376 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001377}
1378
1379static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001380string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001381{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001382 if ( index != 0 ) {
1383 PyErr_SetString(PyExc_SystemError,
1384 "accessing non-existent string segment");
1385 return -1;
1386 }
1387 *ptr = self->ob_sval;
1388 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001389}
1390
1391static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001392string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001393{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001394 return PyBuffer_FillInfo(view, (PyObject*)self,
1395 (void *)self->ob_sval, Py_SIZE(self),
1396 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001397}
1398
1399static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001400 (lenfunc)string_length, /*sq_length*/
1401 (binaryfunc)string_concat, /*sq_concat*/
1402 (ssizeargfunc)string_repeat, /*sq_repeat*/
1403 (ssizeargfunc)string_item, /*sq_item*/
1404 (ssizessizeargfunc)string_slice, /*sq_slice*/
1405 0, /*sq_ass_item*/
1406 0, /*sq_ass_slice*/
1407 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001408};
1409
1410static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001411 (lenfunc)string_length,
1412 (binaryfunc)string_subscript,
1413 0,
Christian Heimes44720832008-05-26 13:01:01 +00001414};
1415
1416static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001417 (readbufferproc)string_buffer_getreadbuf,
1418 (writebufferproc)string_buffer_getwritebuf,
1419 (segcountproc)string_buffer_getsegcount,
1420 (charbufferproc)string_buffer_getcharbuf,
1421 (getbufferproc)string_buffer_getbuffer,
1422 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001423};
1424
1425
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001426
Christian Heimes44720832008-05-26 13:01:01 +00001427#define LEFTSTRIP 0
1428#define RIGHTSTRIP 1
1429#define BOTHSTRIP 2
1430
1431/* Arrays indexed by above */
1432static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1433
1434#define STRIPNAME(i) (stripformat[i]+3)
1435
Christian Heimes1a6387e2008-03-26 12:49:49 +00001436PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001437"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438\n\
Christian Heimes44720832008-05-26 13:01:01 +00001439Return a list of the words in the string S, using sep as the\n\
1440delimiter string. If maxsplit is given, at most maxsplit\n\
1441splits are done. If sep is not specified or is None, any\n\
1442whitespace string is a separator and empty strings are removed\n\
1443from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001444
1445static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001446string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001447{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001448 Py_ssize_t len = PyString_GET_SIZE(self), n;
1449 Py_ssize_t maxsplit = -1;
1450 const char *s = PyString_AS_STRING(self), *sub;
1451 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001452
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001453 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1454 return NULL;
1455 if (maxsplit < 0)
1456 maxsplit = PY_SSIZE_T_MAX;
1457 if (subobj == Py_None)
1458 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1459 if (PyString_Check(subobj)) {
1460 sub = PyString_AS_STRING(subobj);
1461 n = PyString_GET_SIZE(subobj);
1462 }
Christian Heimes44720832008-05-26 13:01:01 +00001463#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001464 else if (PyUnicode_Check(subobj))
1465 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001466#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001467 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1468 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001470 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001471}
1472
1473PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001474"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001476Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001477the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001478found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479
1480static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001481string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001483 const char *sep;
1484 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001486 if (PyString_Check(sep_obj)) {
1487 sep = PyString_AS_STRING(sep_obj);
1488 sep_len = PyString_GET_SIZE(sep_obj);
1489 }
Christian Heimes44720832008-05-26 13:01:01 +00001490#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001491 else if (PyUnicode_Check(sep_obj))
1492 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001493#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001494 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1495 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001497 return stringlib_partition(
1498 (PyObject*) self,
1499 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1500 sep_obj, sep, sep_len
1501 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001502}
1503
1504PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001505"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001507Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001508the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001509separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510
1511static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001512string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001513{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001514 const char *sep;
1515 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001517 if (PyString_Check(sep_obj)) {
1518 sep = PyString_AS_STRING(sep_obj);
1519 sep_len = PyString_GET_SIZE(sep_obj);
1520 }
Christian Heimes44720832008-05-26 13:01:01 +00001521#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001522 else if (PyUnicode_Check(sep_obj))
1523 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001524#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001525 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1526 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001528 return stringlib_rpartition(
1529 (PyObject*) self,
1530 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1531 sep_obj, sep, sep_len
1532 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533}
1534
Christian Heimes1a6387e2008-03-26 12:49:49 +00001535PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001536"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001537\n\
Christian Heimes44720832008-05-26 13:01:01 +00001538Return a list of the words in the string S, using sep as the\n\
1539delimiter string, starting at the end of the string and working\n\
1540to the front. If maxsplit is given, at most maxsplit splits are\n\
1541done. If sep is not specified or is None, any whitespace string\n\
1542is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543
1544static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001545string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001546{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001547 Py_ssize_t len = PyString_GET_SIZE(self), n;
1548 Py_ssize_t maxsplit = -1;
1549 const char *s = PyString_AS_STRING(self), *sub;
1550 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001551
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001552 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1553 return NULL;
1554 if (maxsplit < 0)
1555 maxsplit = PY_SSIZE_T_MAX;
1556 if (subobj == Py_None)
1557 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1558 if (PyString_Check(subobj)) {
1559 sub = PyString_AS_STRING(subobj);
1560 n = PyString_GET_SIZE(subobj);
1561 }
Christian Heimes44720832008-05-26 13:01:01 +00001562#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001563 else if (PyUnicode_Check(subobj))
1564 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001565#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001566 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1567 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001568
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001569 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001570}
1571
1572
1573PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001574"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001575\n\
1576Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001577iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001578
1579static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001580string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001581{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001582 char *sep = PyString_AS_STRING(self);
1583 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1584 PyObject *res = NULL;
1585 char *p;
1586 Py_ssize_t seqlen = 0;
1587 size_t sz = 0;
1588 Py_ssize_t i;
1589 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001590
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001591 seq = PySequence_Fast(orig, "");
1592 if (seq == NULL) {
1593 return NULL;
1594 }
Christian Heimes44720832008-05-26 13:01:01 +00001595
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001596 seqlen = PySequence_Size(seq);
1597 if (seqlen == 0) {
1598 Py_DECREF(seq);
1599 return PyString_FromString("");
1600 }
1601 if (seqlen == 1) {
1602 item = PySequence_Fast_GET_ITEM(seq, 0);
1603 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1604 Py_INCREF(item);
1605 Py_DECREF(seq);
1606 return item;
1607 }
1608 }
Christian Heimes44720832008-05-26 13:01:01 +00001609
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001610 /* There are at least two things to join, or else we have a subclass
1611 * of the builtin types in the sequence.
1612 * Do a pre-pass to figure out the total amount of space we'll
1613 * need (sz), see whether any argument is absurd, and defer to
1614 * the Unicode join if appropriate.
1615 */
1616 for (i = 0; i < seqlen; i++) {
1617 const size_t old_sz = sz;
1618 item = PySequence_Fast_GET_ITEM(seq, i);
1619 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001620#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001621 if (PyUnicode_Check(item)) {
1622 /* Defer to Unicode join.
1623 * CAUTION: There's no gurantee that the
1624 * original sequence can be iterated over
1625 * again, so we must pass seq here.
1626 */
1627 PyObject *result;
1628 result = PyUnicode_Join((PyObject *)self, seq);
1629 Py_DECREF(seq);
1630 return result;
1631 }
Christian Heimes44720832008-05-26 13:01:01 +00001632#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001633 PyErr_Format(PyExc_TypeError,
1634 "sequence item %zd: expected string,"
1635 " %.80s found",
1636 i, Py_TYPE(item)->tp_name);
1637 Py_DECREF(seq);
1638 return NULL;
1639 }
1640 sz += PyString_GET_SIZE(item);
1641 if (i != 0)
1642 sz += seplen;
1643 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1644 PyErr_SetString(PyExc_OverflowError,
1645 "join() result is too long for a Python string");
1646 Py_DECREF(seq);
1647 return NULL;
1648 }
1649 }
Christian Heimes44720832008-05-26 13:01:01 +00001650
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001651 /* Allocate result space. */
1652 res = PyString_FromStringAndSize((char*)NULL, sz);
1653 if (res == NULL) {
1654 Py_DECREF(seq);
1655 return NULL;
1656 }
Christian Heimes44720832008-05-26 13:01:01 +00001657
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001658 /* Catenate everything. */
1659 p = PyString_AS_STRING(res);
1660 for (i = 0; i < seqlen; ++i) {
1661 size_t n;
1662 item = PySequence_Fast_GET_ITEM(seq, i);
1663 n = PyString_GET_SIZE(item);
1664 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1665 p += n;
1666 if (i < seqlen - 1) {
1667 Py_MEMCPY(p, sep, seplen);
1668 p += seplen;
1669 }
1670 }
Christian Heimes44720832008-05-26 13:01:01 +00001671
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001672 Py_DECREF(seq);
1673 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001674}
1675
1676PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001677_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001678{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001679 assert(sep != NULL && PyString_Check(sep));
1680 assert(x != NULL);
1681 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001682}
1683
Antoine Pitrou64672132010-01-13 07:55:48 +00001684/* helper macro to fixup start/end slice values */
1685#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001686 if (end > len) \
1687 end = len; \
1688 else if (end < 0) { \
1689 end += len; \
1690 if (end < 0) \
1691 end = 0; \
1692 } \
1693 if (start < 0) { \
1694 start += len; \
1695 if (start < 0) \
1696 start = 0; \
1697 }
Christian Heimes44720832008-05-26 13:01:01 +00001698
1699Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001700string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001701{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001702 PyObject *subobj;
1703 const char *sub;
1704 Py_ssize_t sub_len;
1705 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001706
Jesus Cea44e81682011-04-20 16:39:15 +02001707 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1708 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001709 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001710
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001711 if (PyString_Check(subobj)) {
1712 sub = PyString_AS_STRING(subobj);
1713 sub_len = PyString_GET_SIZE(subobj);
1714 }
Christian Heimes44720832008-05-26 13:01:01 +00001715#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001716 else if (PyUnicode_Check(subobj))
1717 return PyUnicode_Find(
1718 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001719#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001720 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1721 /* XXX - the "expected a character buffer object" is pretty
1722 confusing for a non-expert. remap to something else ? */
1723 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001724
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001725 if (dir > 0)
1726 return stringlib_find_slice(
1727 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1728 sub, sub_len, start, end);
1729 else
1730 return stringlib_rfind_slice(
1731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001733}
1734
1735
1736PyDoc_STRVAR(find__doc__,
1737"S.find(sub [,start [,end]]) -> int\n\
1738\n\
1739Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001740such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001741arguments start and end are interpreted as in slice notation.\n\
1742\n\
1743Return -1 on failure.");
1744
1745static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001746string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001747{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001748 Py_ssize_t result = string_find_internal(self, args, +1);
1749 if (result == -2)
1750 return NULL;
1751 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001752}
1753
1754
1755PyDoc_STRVAR(index__doc__,
1756"S.index(sub [,start [,end]]) -> int\n\
1757\n\
1758Like S.find() but raise ValueError when the substring is not found.");
1759
1760static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001761string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001762{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001763 Py_ssize_t result = string_find_internal(self, args, +1);
1764 if (result == -2)
1765 return NULL;
1766 if (result == -1) {
1767 PyErr_SetString(PyExc_ValueError,
1768 "substring not found");
1769 return NULL;
1770 }
1771 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001772}
1773
1774
1775PyDoc_STRVAR(rfind__doc__,
1776"S.rfind(sub [,start [,end]]) -> int\n\
1777\n\
1778Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001779such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001780arguments start and end are interpreted as in slice notation.\n\
1781\n\
1782Return -1 on failure.");
1783
1784static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001785string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001786{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001787 Py_ssize_t result = string_find_internal(self, args, -1);
1788 if (result == -2)
1789 return NULL;
1790 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001791}
1792
1793
1794PyDoc_STRVAR(rindex__doc__,
1795"S.rindex(sub [,start [,end]]) -> int\n\
1796\n\
1797Like S.rfind() but raise ValueError when the substring is not found.");
1798
1799static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001800string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001801{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001802 Py_ssize_t result = string_find_internal(self, args, -1);
1803 if (result == -2)
1804 return NULL;
1805 if (result == -1) {
1806 PyErr_SetString(PyExc_ValueError,
1807 "substring not found");
1808 return NULL;
1809 }
1810 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001811}
1812
1813
1814Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001815do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001816{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001817 char *s = PyString_AS_STRING(self);
1818 Py_ssize_t len = PyString_GET_SIZE(self);
1819 char *sep = PyString_AS_STRING(sepobj);
1820 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1821 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001823 i = 0;
1824 if (striptype != RIGHTSTRIP) {
1825 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1826 i++;
1827 }
1828 }
Christian Heimes44720832008-05-26 13:01:01 +00001829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 j = len;
1831 if (striptype != LEFTSTRIP) {
1832 do {
1833 j--;
1834 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1835 j++;
1836 }
Christian Heimes44720832008-05-26 13:01:01 +00001837
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001838 if (i == 0 && j == len && PyString_CheckExact(self)) {
1839 Py_INCREF(self);
1840 return (PyObject*)self;
1841 }
1842 else
1843 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001844}
1845
1846
1847Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001848do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001849{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001850 char *s = PyString_AS_STRING(self);
1851 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001852
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001853 i = 0;
1854 if (striptype != RIGHTSTRIP) {
1855 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1856 i++;
1857 }
1858 }
Christian Heimes44720832008-05-26 13:01:01 +00001859
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001860 j = len;
1861 if (striptype != LEFTSTRIP) {
1862 do {
1863 j--;
1864 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1865 j++;
1866 }
Christian Heimes44720832008-05-26 13:01:01 +00001867
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001868 if (i == 0 && j == len && PyString_CheckExact(self)) {
1869 Py_INCREF(self);
1870 return (PyObject*)self;
1871 }
1872 else
1873 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001874}
1875
1876
1877Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001878do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001879{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001880 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001881
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001882 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1883 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001885 if (sep != NULL && sep != Py_None) {
1886 if (PyString_Check(sep))
1887 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001888#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 else if (PyUnicode_Check(sep)) {
1890 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1891 PyObject *res;
1892 if (uniself==NULL)
1893 return NULL;
1894 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1895 striptype, sep);
1896 Py_DECREF(uniself);
1897 return res;
1898 }
Christian Heimes44720832008-05-26 13:01:01 +00001899#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001900 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001901#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001902 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001903#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001904 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001905#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001906 STRIPNAME(striptype));
1907 return NULL;
1908 }
Christian Heimes44720832008-05-26 13:01:01 +00001909
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001911}
1912
1913
1914PyDoc_STRVAR(strip__doc__,
1915"S.strip([chars]) -> string or unicode\n\
1916\n\
1917Return a copy of the string S with leading and trailing\n\
1918whitespace removed.\n\
1919If chars is given and not None, remove characters in chars instead.\n\
1920If chars is unicode, S will be converted to unicode before stripping");
1921
1922static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001923string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001924{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001925 if (PyTuple_GET_SIZE(args) == 0)
1926 return do_strip(self, BOTHSTRIP); /* Common case */
1927 else
1928 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001929}
1930
1931
1932PyDoc_STRVAR(lstrip__doc__,
1933"S.lstrip([chars]) -> string or unicode\n\
1934\n\
1935Return a copy of the string S with leading whitespace removed.\n\
1936If chars is given and not None, remove characters in chars instead.\n\
1937If chars is unicode, S will be converted to unicode before stripping");
1938
1939static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001940string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001941{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001942 if (PyTuple_GET_SIZE(args) == 0)
1943 return do_strip(self, LEFTSTRIP); /* Common case */
1944 else
1945 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001946}
1947
1948
1949PyDoc_STRVAR(rstrip__doc__,
1950"S.rstrip([chars]) -> string or unicode\n\
1951\n\
1952Return a copy of the string S with trailing whitespace removed.\n\
1953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
1955
1956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001957string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001958{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, RIGHTSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001963}
1964
1965
1966PyDoc_STRVAR(lower__doc__,
1967"S.lower() -> string\n\
1968\n\
1969Return a copy of the string S converted to lowercase.");
1970
1971/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1972#ifndef _tolower
1973#define _tolower tolower
1974#endif
1975
1976static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001977string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001978{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001979 char *s;
1980 Py_ssize_t i, n = PyString_GET_SIZE(self);
1981 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001982
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001983 newobj = PyString_FromStringAndSize(NULL, n);
1984 if (!newobj)
1985 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001988
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001989 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001990
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001991 for (i = 0; i < n; i++) {
1992 int c = Py_CHARMASK(s[i]);
1993 if (isupper(c))
1994 s[i] = _tolower(c);
1995 }
Christian Heimes44720832008-05-26 13:01:01 +00001996
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001997 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001998}
1999
2000PyDoc_STRVAR(upper__doc__,
2001"S.upper() -> string\n\
2002\n\
2003Return a copy of the string S converted to uppercase.");
2004
2005#ifndef _toupper
2006#define _toupper toupper
2007#endif
2008
2009static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002010string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002011{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002012 char *s;
2013 Py_ssize_t i, n = PyString_GET_SIZE(self);
2014 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 newobj = PyString_FromStringAndSize(NULL, n);
2017 if (!newobj)
2018 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002021
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002022 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002023
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002024 for (i = 0; i < n; i++) {
2025 int c = Py_CHARMASK(s[i]);
2026 if (islower(c))
2027 s[i] = _toupper(c);
2028 }
Christian Heimes44720832008-05-26 13:01:01 +00002029
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002030 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002031}
2032
2033PyDoc_STRVAR(title__doc__,
2034"S.title() -> string\n\
2035\n\
2036Return a titlecased version of S, i.e. words start with uppercase\n\
2037characters, all remaining cased characters have lowercase.");
2038
2039static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002040string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002041{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002042 char *s = PyString_AS_STRING(self), *s_new;
2043 Py_ssize_t i, n = PyString_GET_SIZE(self);
2044 int previous_is_cased = 0;
2045 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002046
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002047 newobj = PyString_FromStringAndSize(NULL, n);
2048 if (newobj == NULL)
2049 return NULL;
2050 s_new = PyString_AsString(newobj);
2051 for (i = 0; i < n; i++) {
2052 int c = Py_CHARMASK(*s++);
2053 if (islower(c)) {
2054 if (!previous_is_cased)
2055 c = toupper(c);
2056 previous_is_cased = 1;
2057 } else if (isupper(c)) {
2058 if (previous_is_cased)
2059 c = tolower(c);
2060 previous_is_cased = 1;
2061 } else
2062 previous_is_cased = 0;
2063 *s_new++ = c;
2064 }
2065 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002066}
2067
2068PyDoc_STRVAR(capitalize__doc__,
2069"S.capitalize() -> string\n\
2070\n\
2071Return a copy of the string S with only its first character\n\
2072capitalized.");
2073
2074static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002075string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002076{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002077 char *s = PyString_AS_STRING(self), *s_new;
2078 Py_ssize_t i, n = PyString_GET_SIZE(self);
2079 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002080
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002081 newobj = PyString_FromStringAndSize(NULL, n);
2082 if (newobj == NULL)
2083 return NULL;
2084 s_new = PyString_AsString(newobj);
2085 if (0 < n) {
2086 int c = Py_CHARMASK(*s++);
2087 if (islower(c))
2088 *s_new = toupper(c);
2089 else
2090 *s_new = c;
2091 s_new++;
2092 }
2093 for (i = 1; i < n; i++) {
2094 int c = Py_CHARMASK(*s++);
2095 if (isupper(c))
2096 *s_new = tolower(c);
2097 else
2098 *s_new = c;
2099 s_new++;
2100 }
2101 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002102}
2103
2104
2105PyDoc_STRVAR(count__doc__,
2106"S.count(sub[, start[, end]]) -> int\n\
2107\n\
2108Return the number of non-overlapping occurrences of substring sub in\n\
2109string S[start:end]. Optional arguments start and end are interpreted\n\
2110as in slice notation.");
2111
2112static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002113string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002114{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002115 PyObject *sub_obj;
2116 const char *str = PyString_AS_STRING(self), *sub;
2117 Py_ssize_t sub_len;
2118 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002119
Jesus Cea44e81682011-04-20 16:39:15 +02002120 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002121 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002122
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002123 if (PyString_Check(sub_obj)) {
2124 sub = PyString_AS_STRING(sub_obj);
2125 sub_len = PyString_GET_SIZE(sub_obj);
2126 }
Christian Heimes44720832008-05-26 13:01:01 +00002127#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 else if (PyUnicode_Check(sub_obj)) {
2129 Py_ssize_t count;
2130 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2131 if (count == -1)
2132 return NULL;
2133 else
2134 return PyInt_FromSsize_t(count);
2135 }
Christian Heimes44720832008-05-26 13:01:01 +00002136#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002137 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2138 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002139
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002140 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002141
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002142 return PyInt_FromSsize_t(
2143 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2144 );
Christian Heimes44720832008-05-26 13:01:01 +00002145}
2146
2147PyDoc_STRVAR(swapcase__doc__,
2148"S.swapcase() -> string\n\
2149\n\
2150Return a copy of the string S with uppercase characters\n\
2151converted to lowercase and vice versa.");
2152
2153static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002154string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002155{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002156 char *s = PyString_AS_STRING(self), *s_new;
2157 Py_ssize_t i, n = PyString_GET_SIZE(self);
2158 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002159
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002160 newobj = PyString_FromStringAndSize(NULL, n);
2161 if (newobj == NULL)
2162 return NULL;
2163 s_new = PyString_AsString(newobj);
2164 for (i = 0; i < n; i++) {
2165 int c = Py_CHARMASK(*s++);
2166 if (islower(c)) {
2167 *s_new = toupper(c);
2168 }
2169 else if (isupper(c)) {
2170 *s_new = tolower(c);
2171 }
2172 else
2173 *s_new = c;
2174 s_new++;
2175 }
2176 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002177}
2178
2179
2180PyDoc_STRVAR(translate__doc__,
2181"S.translate(table [,deletechars]) -> string\n\
2182\n\
2183Return a copy of the string S, where all characters occurring\n\
2184in the optional argument deletechars are removed, and the\n\
2185remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002186translation table, which must be a string of length 256 or None.\n\
2187If the table argument is None, no translation is applied and\n\
2188the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002189
2190static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002191string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002192{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002193 register char *input, *output;
2194 const char *table;
2195 register Py_ssize_t i, c, changed = 0;
2196 PyObject *input_obj = (PyObject*)self;
2197 const char *output_start, *del_table=NULL;
2198 Py_ssize_t inlen, tablen, dellen = 0;
2199 PyObject *result;
2200 int trans_table[256];
2201 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002202
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002203 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2204 &tableobj, &delobj))
2205 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002206
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002207 if (PyString_Check(tableobj)) {
2208 table = PyString_AS_STRING(tableobj);
2209 tablen = PyString_GET_SIZE(tableobj);
2210 }
2211 else if (tableobj == Py_None) {
2212 table = NULL;
2213 tablen = 256;
2214 }
Christian Heimes44720832008-05-26 13:01:01 +00002215#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002216 else if (PyUnicode_Check(tableobj)) {
2217 /* Unicode .translate() does not support the deletechars
2218 parameter; instead a mapping to None will cause characters
2219 to be deleted. */
2220 if (delobj != NULL) {
2221 PyErr_SetString(PyExc_TypeError,
2222 "deletions are implemented differently for unicode");
2223 return NULL;
2224 }
2225 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2226 }
Christian Heimes44720832008-05-26 13:01:01 +00002227#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002228 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2229 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002230
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002231 if (tablen != 256) {
2232 PyErr_SetString(PyExc_ValueError,
2233 "translation table must be 256 characters long");
2234 return NULL;
2235 }
Christian Heimes44720832008-05-26 13:01:01 +00002236
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002237 if (delobj != NULL) {
2238 if (PyString_Check(delobj)) {
2239 del_table = PyString_AS_STRING(delobj);
2240 dellen = PyString_GET_SIZE(delobj);
2241 }
Christian Heimes44720832008-05-26 13:01:01 +00002242#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002243 else if (PyUnicode_Check(delobj)) {
2244 PyErr_SetString(PyExc_TypeError,
2245 "deletions are implemented differently for unicode");
2246 return NULL;
2247 }
Christian Heimes44720832008-05-26 13:01:01 +00002248#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002249 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2250 return NULL;
2251 }
2252 else {
2253 del_table = NULL;
2254 dellen = 0;
2255 }
Christian Heimes44720832008-05-26 13:01:01 +00002256
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002257 inlen = PyString_GET_SIZE(input_obj);
2258 result = PyString_FromStringAndSize((char *)NULL, inlen);
2259 if (result == NULL)
2260 return NULL;
2261 output_start = output = PyString_AsString(result);
2262 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002263
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002264 if (dellen == 0 && table != NULL) {
2265 /* If no deletions are required, use faster code */
2266 for (i = inlen; --i >= 0; ) {
2267 c = Py_CHARMASK(*input++);
2268 if (Py_CHARMASK((*output++ = table[c])) != c)
2269 changed = 1;
2270 }
2271 if (changed || !PyString_CheckExact(input_obj))
2272 return result;
2273 Py_DECREF(result);
2274 Py_INCREF(input_obj);
2275 return input_obj;
2276 }
Christian Heimes44720832008-05-26 13:01:01 +00002277
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002278 if (table == NULL) {
2279 for (i = 0; i < 256; i++)
2280 trans_table[i] = Py_CHARMASK(i);
2281 } else {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(table[i]);
2284 }
Christian Heimes44720832008-05-26 13:01:01 +00002285
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002286 for (i = 0; i < dellen; i++)
2287 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002288
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002289 for (i = inlen; --i >= 0; ) {
2290 c = Py_CHARMASK(*input++);
2291 if (trans_table[c] != -1)
2292 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2293 continue;
2294 changed = 1;
2295 }
2296 if (!changed && PyString_CheckExact(input_obj)) {
2297 Py_DECREF(result);
2298 Py_INCREF(input_obj);
2299 return input_obj;
2300 }
2301 /* Fix the size of the resulting string */
2302 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2303 return NULL;
2304 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002305}
2306
2307
Christian Heimes44720832008-05-26 13:01:01 +00002308/* find and count characters and substrings */
2309
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002310#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002311 ((char *)memchr((const void *)(target), c, target_len))
2312
2313/* String ops must return a string. */
2314/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002315Py_LOCAL(PyStringObject *)
2316return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002317{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002318 if (PyString_CheckExact(self)) {
2319 Py_INCREF(self);
2320 return self;
2321 }
2322 return (PyStringObject *)PyString_FromStringAndSize(
2323 PyString_AS_STRING(self),
2324 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002325}
2326
2327Py_LOCAL_INLINE(Py_ssize_t)
2328countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2329{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002330 Py_ssize_t count=0;
2331 const char *start=target;
2332 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002333
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002334 while ( (start=findchar(start, end-start, c)) != NULL ) {
2335 count++;
2336 if (count >= maxcount)
2337 break;
2338 start += 1;
2339 }
2340 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002341}
2342
Christian Heimes44720832008-05-26 13:01:01 +00002343
2344/* Algorithms for different cases of string replacement */
2345
2346/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002347Py_LOCAL(PyStringObject *)
2348replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002349 const char *to_s, Py_ssize_t to_len,
2350 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002351{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002352 char *self_s, *result_s;
2353 Py_ssize_t self_len, result_len;
2354 Py_ssize_t count, i, product;
2355 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002356
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002357 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002358
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002359 /* 1 at the end plus 1 after every character */
2360 count = self_len+1;
2361 if (maxcount < count)
2362 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002363
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002364 /* Check for overflow */
2365 /* result_len = count * to_len + self_len; */
2366 product = count * to_len;
2367 if (product / to_len != count) {
2368 PyErr_SetString(PyExc_OverflowError,
2369 "replace string is too long");
2370 return NULL;
2371 }
2372 result_len = product + self_len;
2373 if (result_len < 0) {
2374 PyErr_SetString(PyExc_OverflowError,
2375 "replace string is too long");
2376 return NULL;
2377 }
Christian Heimes44720832008-05-26 13:01:01 +00002378
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002379 if (! (result = (PyStringObject *)
2380 PyString_FromStringAndSize(NULL, result_len)) )
2381 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002382
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002383 self_s = PyString_AS_STRING(self);
2384 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002387
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002388 /* Lay the first one down (guaranteed this will occur) */
2389 Py_MEMCPY(result_s, to_s, to_len);
2390 result_s += to_len;
2391 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002392
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002393 for (i=0; i<count; i++) {
2394 *result_s++ = *self_s++;
2395 Py_MEMCPY(result_s, to_s, to_len);
2396 result_s += to_len;
2397 }
2398
2399 /* Copy the rest of the original string */
2400 Py_MEMCPY(result_s, self_s, self_len-i);
2401
2402 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002403}
2404
2405/* Special case for deleting a single character */
2406/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002407Py_LOCAL(PyStringObject *)
2408replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002409 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002410{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002411 char *self_s, *result_s;
2412 char *start, *next, *end;
2413 Py_ssize_t self_len, result_len;
2414 Py_ssize_t count;
2415 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002416
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002417 self_len = PyString_GET_SIZE(self);
2418 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002419
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002420 count = countchar(self_s, self_len, from_c, maxcount);
2421 if (count == 0) {
2422 return return_self(self);
2423 }
Christian Heimes44720832008-05-26 13:01:01 +00002424
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002425 result_len = self_len - count; /* from_len == 1 */
2426 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002427
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002428 if ( (result = (PyStringObject *)
2429 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2430 return NULL;
2431 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002432
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002433 start = self_s;
2434 end = self_s + self_len;
2435 while (count-- > 0) {
2436 next = findchar(start, end-start, from_c);
2437 if (next == NULL)
2438 break;
2439 Py_MEMCPY(result_s, start, next-start);
2440 result_s += (next-start);
2441 start = next+1;
2442 }
2443 Py_MEMCPY(result_s, start, end-start);
2444
2445 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002446}
2447
2448/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002450Py_LOCAL(PyStringObject *)
2451replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002452 const char *from_s, Py_ssize_t from_len,
2453 Py_ssize_t maxcount) {
2454 char *self_s, *result_s;
2455 char *start, *next, *end;
2456 Py_ssize_t self_len, result_len;
2457 Py_ssize_t count, offset;
2458 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002460 self_len = PyString_GET_SIZE(self);
2461 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002463 count = stringlib_count(self_s, self_len,
2464 from_s, from_len,
2465 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002466
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002467 if (count == 0) {
2468 /* no matches */
2469 return return_self(self);
2470 }
Christian Heimes44720832008-05-26 13:01:01 +00002471
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002472 result_len = self_len - (count * from_len);
2473 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002475 if ( (result = (PyStringObject *)
2476 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2477 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002478
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002479 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002481 start = self_s;
2482 end = self_s + self_len;
2483 while (count-- > 0) {
2484 offset = stringlib_find(start, end-start,
2485 from_s, from_len,
2486 0);
2487 if (offset == -1)
2488 break;
2489 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002490
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002491 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002492
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002493 result_s += (next-start);
2494 start = next+from_len;
2495 }
2496 Py_MEMCPY(result_s, start, end-start);
2497 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002498}
2499
2500/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002501Py_LOCAL(PyStringObject *)
2502replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002503 char from_c, char to_c,
2504 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002505{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002506 char *self_s, *result_s, *start, *end, *next;
2507 Py_ssize_t self_len;
2508 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002509
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002510 /* The result string will be the same size */
2511 self_s = PyString_AS_STRING(self);
2512 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002513
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002514 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002515
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002516 if (next == NULL) {
2517 /* No matches; return the original string */
2518 return return_self(self);
2519 }
Christian Heimes44720832008-05-26 13:01:01 +00002520
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002521 /* Need to make a new string */
2522 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2523 if (result == NULL)
2524 return NULL;
2525 result_s = PyString_AS_STRING(result);
2526 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002527
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002528 /* change everything in-place, starting with this one */
2529 start = result_s + (next-self_s);
2530 *start = to_c;
2531 start++;
2532 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002533
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002534 while (--maxcount > 0) {
2535 next = findchar(start, end-start, from_c);
2536 if (next == NULL)
2537 break;
2538 *next = to_c;
2539 start = next+1;
2540 }
Christian Heimes44720832008-05-26 13:01:01 +00002541
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002542 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002543}
2544
2545/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002546Py_LOCAL(PyStringObject *)
2547replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002548 const char *from_s, Py_ssize_t from_len,
2549 const char *to_s, Py_ssize_t to_len,
2550 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002551{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002552 char *result_s, *start, *end;
2553 char *self_s;
2554 Py_ssize_t self_len, offset;
2555 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002556
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002557 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002559 self_s = PyString_AS_STRING(self);
2560 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002561
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002562 offset = stringlib_find(self_s, self_len,
2563 from_s, from_len,
2564 0);
2565 if (offset == -1) {
2566 /* No matches; return the original string */
2567 return return_self(self);
2568 }
Christian Heimes44720832008-05-26 13:01:01 +00002569
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002570 /* Need to make a new string */
2571 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2572 if (result == NULL)
2573 return NULL;
2574 result_s = PyString_AS_STRING(result);
2575 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002576
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002577 /* change everything in-place, starting with this one */
2578 start = result_s + offset;
2579 Py_MEMCPY(start, to_s, from_len);
2580 start += from_len;
2581 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002582
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002583 while ( --maxcount > 0) {
2584 offset = stringlib_find(start, end-start,
2585 from_s, from_len,
2586 0);
2587 if (offset==-1)
2588 break;
2589 Py_MEMCPY(start+offset, to_s, from_len);
2590 start += offset+from_len;
2591 }
Christian Heimes44720832008-05-26 13:01:01 +00002592
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002593 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002594}
2595
2596/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002597Py_LOCAL(PyStringObject *)
2598replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002599 char from_c,
2600 const char *to_s, Py_ssize_t to_len,
2601 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002602{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002603 char *self_s, *result_s;
2604 char *start, *next, *end;
2605 Py_ssize_t self_len, result_len;
2606 Py_ssize_t count, product;
2607 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002608
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002609 self_s = PyString_AS_STRING(self);
2610 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002612 count = countchar(self_s, self_len, from_c, maxcount);
2613 if (count == 0) {
2614 /* no matches, return unchanged */
2615 return return_self(self);
2616 }
Christian Heimes44720832008-05-26 13:01:01 +00002617
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002618 /* use the difference between current and new, hence the "-1" */
2619 /* result_len = self_len + count * (to_len-1) */
2620 product = count * (to_len-1);
2621 if (product / (to_len-1) != count) {
2622 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2623 return NULL;
2624 }
2625 result_len = self_len + product;
2626 if (result_len < 0) {
2627 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2628 return NULL;
2629 }
Christian Heimes44720832008-05-26 13:01:01 +00002630
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002631 if ( (result = (PyStringObject *)
2632 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2633 return NULL;
2634 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002635
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002636 start = self_s;
2637 end = self_s + self_len;
2638 while (count-- > 0) {
2639 next = findchar(start, end-start, from_c);
2640 if (next == NULL)
2641 break;
Christian Heimes44720832008-05-26 13:01:01 +00002642
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002643 if (next == start) {
2644 /* replace with the 'to' */
2645 Py_MEMCPY(result_s, to_s, to_len);
2646 result_s += to_len;
2647 start += 1;
2648 } else {
2649 /* copy the unchanged old then the 'to' */
2650 Py_MEMCPY(result_s, start, next-start);
2651 result_s += (next-start);
2652 Py_MEMCPY(result_s, to_s, to_len);
2653 result_s += to_len;
2654 start = next+1;
2655 }
2656 }
2657 /* Copy the remainder of the remaining string */
2658 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002659
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002660 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002661}
2662
2663/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002664Py_LOCAL(PyStringObject *)
2665replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002666 const char *from_s, Py_ssize_t from_len,
2667 const char *to_s, Py_ssize_t to_len,
2668 Py_ssize_t maxcount) {
2669 char *self_s, *result_s;
2670 char *start, *next, *end;
2671 Py_ssize_t self_len, result_len;
2672 Py_ssize_t count, offset, product;
2673 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002674
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002675 self_s = PyString_AS_STRING(self);
2676 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002677
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002678 count = stringlib_count(self_s, self_len,
2679 from_s, from_len,
2680 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002681
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002682 if (count == 0) {
2683 /* no matches, return unchanged */
2684 return return_self(self);
2685 }
Christian Heimes44720832008-05-26 13:01:01 +00002686
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002687 /* Check for overflow */
2688 /* result_len = self_len + count * (to_len-from_len) */
2689 product = count * (to_len-from_len);
2690 if (product / (to_len-from_len) != count) {
2691 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2692 return NULL;
2693 }
2694 result_len = self_len + product;
2695 if (result_len < 0) {
2696 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2697 return NULL;
2698 }
Christian Heimes44720832008-05-26 13:01:01 +00002699
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002700 if ( (result = (PyStringObject *)
2701 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2702 return NULL;
2703 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002704
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002705 start = self_s;
2706 end = self_s + self_len;
2707 while (count-- > 0) {
2708 offset = stringlib_find(start, end-start,
2709 from_s, from_len,
2710 0);
2711 if (offset == -1)
2712 break;
2713 next = start+offset;
2714 if (next == start) {
2715 /* replace with the 'to' */
2716 Py_MEMCPY(result_s, to_s, to_len);
2717 result_s += to_len;
2718 start += from_len;
2719 } else {
2720 /* copy the unchanged old then the 'to' */
2721 Py_MEMCPY(result_s, start, next-start);
2722 result_s += (next-start);
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 start = next+from_len;
2726 }
2727 }
2728 /* Copy the remainder of the remaining string */
2729 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002730
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002731 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002732}
2733
2734
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002735Py_LOCAL(PyStringObject *)
2736replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002737 const char *from_s, Py_ssize_t from_len,
2738 const char *to_s, Py_ssize_t to_len,
2739 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002740{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002741 if (maxcount < 0) {
2742 maxcount = PY_SSIZE_T_MAX;
2743 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2744 /* nothing to do; return the original string */
2745 return return_self(self);
2746 }
Christian Heimes44720832008-05-26 13:01:01 +00002747
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002748 if (maxcount == 0 ||
2749 (from_len == 0 && to_len == 0)) {
2750 /* nothing to do; return the original string */
2751 return return_self(self);
2752 }
Christian Heimes44720832008-05-26 13:01:01 +00002753
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002754 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002755
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002756 if (from_len == 0) {
2757 /* insert the 'to' string everywhere. */
2758 /* >>> "Python".replace("", ".") */
2759 /* '.P.y.t.h.o.n.' */
2760 return replace_interleave(self, to_s, to_len, maxcount);
2761 }
Christian Heimes44720832008-05-26 13:01:01 +00002762
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002763 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2764 /* point for an empty self string to generate a non-empty string */
2765 /* Special case so the remaining code always gets a non-empty string */
2766 if (PyString_GET_SIZE(self) == 0) {
2767 return return_self(self);
2768 }
Christian Heimes44720832008-05-26 13:01:01 +00002769
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002770 if (to_len == 0) {
2771 /* delete all occurances of 'from' string */
2772 if (from_len == 1) {
2773 return replace_delete_single_character(
2774 self, from_s[0], maxcount);
2775 } else {
2776 return replace_delete_substring(self, from_s, from_len, maxcount);
2777 }
2778 }
Christian Heimes44720832008-05-26 13:01:01 +00002779
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002780 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002781
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002782 if (from_len == to_len) {
2783 if (from_len == 1) {
2784 return replace_single_character_in_place(
2785 self,
2786 from_s[0],
2787 to_s[0],
2788 maxcount);
2789 } else {
2790 return replace_substring_in_place(
2791 self, from_s, from_len, to_s, to_len, maxcount);
2792 }
2793 }
Christian Heimes44720832008-05-26 13:01:01 +00002794
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002795 /* Otherwise use the more generic algorithms */
2796 if (from_len == 1) {
2797 return replace_single_character(self, from_s[0],
2798 to_s, to_len, maxcount);
2799 } else {
2800 /* len('from')>=2, len('to')>=1 */
2801 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2802 }
Christian Heimes44720832008-05-26 13:01:01 +00002803}
2804
2805PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002806"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002807\n\
2808Return a copy of string S with all occurrences of substring\n\
2809old replaced by new. If the optional argument count is\n\
2810given, only the first count occurrences are replaced.");
2811
2812static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002813string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002814{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002815 Py_ssize_t count = -1;
2816 PyObject *from, *to;
2817 const char *from_s, *to_s;
2818 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002819
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002820 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2821 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002823 if (PyString_Check(from)) {
2824 from_s = PyString_AS_STRING(from);
2825 from_len = PyString_GET_SIZE(from);
2826 }
Christian Heimes44720832008-05-26 13:01:01 +00002827#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002828 if (PyUnicode_Check(from))
2829 return PyUnicode_Replace((PyObject *)self,
2830 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002831#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002832 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2833 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002834
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 if (PyString_Check(to)) {
2836 to_s = PyString_AS_STRING(to);
2837 to_len = PyString_GET_SIZE(to);
2838 }
Christian Heimes44720832008-05-26 13:01:01 +00002839#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002840 else if (PyUnicode_Check(to))
2841 return PyUnicode_Replace((PyObject *)self,
2842 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002843#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002844 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2845 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002846
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002847 return (PyObject *)replace((PyStringObject *) self,
2848 from_s, from_len,
2849 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002850}
2851
2852/** End DALKE **/
2853
2854/* Matches the end (direction >= 0) or start (direction < 0) of self
2855 * against substr, using the start and end arguments. Returns
2856 * -1 on error, 0 if not found and 1 if found.
2857 */
2858Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002859_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002860 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002861{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002862 Py_ssize_t len = PyString_GET_SIZE(self);
2863 Py_ssize_t slen;
2864 const char* sub;
2865 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002866
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002867 if (PyString_Check(substr)) {
2868 sub = PyString_AS_STRING(substr);
2869 slen = PyString_GET_SIZE(substr);
2870 }
Christian Heimes44720832008-05-26 13:01:01 +00002871#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002872 else if (PyUnicode_Check(substr))
2873 return PyUnicode_Tailmatch((PyObject *)self,
2874 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002875#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002876 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2877 return -1;
2878 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002879
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002880 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002881
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002882 if (direction < 0) {
2883 /* startswith */
2884 if (start+slen > len)
2885 return 0;
2886 } else {
2887 /* endswith */
2888 if (end-start < slen || start > len)
2889 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002890
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002891 if (end-slen > start)
2892 start = end - slen;
2893 }
2894 if (end-start >= slen)
2895 return ! memcmp(str+start, sub, slen);
2896 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002897}
2898
2899
2900PyDoc_STRVAR(startswith__doc__,
2901"S.startswith(prefix[, start[, end]]) -> bool\n\
2902\n\
2903Return True if S starts with the specified prefix, False otherwise.\n\
2904With optional start, test S beginning at that position.\n\
2905With optional end, stop comparing S at that position.\n\
2906prefix can also be a tuple of strings to try.");
2907
2908static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002909string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002910{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002911 Py_ssize_t start = 0;
2912 Py_ssize_t end = PY_SSIZE_T_MAX;
2913 PyObject *subobj;
2914 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002915
Jesus Cea44e81682011-04-20 16:39:15 +02002916 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002917 return NULL;
2918 if (PyTuple_Check(subobj)) {
2919 Py_ssize_t i;
2920 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2921 result = _string_tailmatch(self,
2922 PyTuple_GET_ITEM(subobj, i),
2923 start, end, -1);
2924 if (result == -1)
2925 return NULL;
2926 else if (result) {
2927 Py_RETURN_TRUE;
2928 }
2929 }
2930 Py_RETURN_FALSE;
2931 }
2932 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002933 if (result == -1) {
2934 if (PyErr_ExceptionMatches(PyExc_TypeError))
2935 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2936 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002937 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002938 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002939 else
2940 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002941}
2942
2943
2944PyDoc_STRVAR(endswith__doc__,
2945"S.endswith(suffix[, start[, end]]) -> bool\n\
2946\n\
2947Return True if S ends with the specified suffix, False otherwise.\n\
2948With optional start, test S beginning at that position.\n\
2949With optional end, stop comparing S at that position.\n\
2950suffix can also be a tuple of strings to try.");
2951
2952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002954{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002955 Py_ssize_t start = 0;
2956 Py_ssize_t end = PY_SSIZE_T_MAX;
2957 PyObject *subobj;
2958 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002959
Jesus Cea44e81682011-04-20 16:39:15 +02002960 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002961 return NULL;
2962 if (PyTuple_Check(subobj)) {
2963 Py_ssize_t i;
2964 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2965 result = _string_tailmatch(self,
2966 PyTuple_GET_ITEM(subobj, i),
2967 start, end, +1);
2968 if (result == -1)
2969 return NULL;
2970 else if (result) {
2971 Py_RETURN_TRUE;
2972 }
2973 }
2974 Py_RETURN_FALSE;
2975 }
2976 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002977 if (result == -1) {
2978 if (PyErr_ExceptionMatches(PyExc_TypeError))
2979 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2980 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002981 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002982 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002983 else
2984 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002985}
2986
2987
2988PyDoc_STRVAR(encode__doc__,
2989"S.encode([encoding[,errors]]) -> object\n\
2990\n\
2991Encodes S using the codec registered for encoding. encoding defaults\n\
2992to the default encoding. errors may be given to set a different error\n\
2993handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2994a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2995'xmlcharrefreplace' as well as any other name registered with\n\
2996codecs.register_error that is able to handle UnicodeEncodeErrors.");
2997
2998static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002999string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003000{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003001 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003002 char *encoding = NULL;
3003 char *errors = NULL;
3004 PyObject *v;
3005
Benjamin Peterson332d7212009-09-18 21:14:55 +00003006 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003007 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003008 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003010 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003011 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003012 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003013 PyErr_Format(PyExc_TypeError,
3014 "encoder did not return a string/unicode object "
3015 "(type=%.400s)",
3016 Py_TYPE(v)->tp_name);
3017 Py_DECREF(v);
3018 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003019 }
3020 return v;
3021
3022 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003023 return NULL;
3024}
3025
Christian Heimes44720832008-05-26 13:01:01 +00003026
3027PyDoc_STRVAR(decode__doc__,
3028"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003029\n\
Christian Heimes44720832008-05-26 13:01:01 +00003030Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003031to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003032handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3033a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003034as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003035able to handle UnicodeDecodeErrors.");
3036
3037static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003038string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003039{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003040 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003041 char *encoding = NULL;
3042 char *errors = NULL;
3043 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003044
Benjamin Peterson332d7212009-09-18 21:14:55 +00003045 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003046 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003047 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003048 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003049 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003050 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003051 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003052 PyErr_Format(PyExc_TypeError,
3053 "decoder did not return a string/unicode object "
3054 "(type=%.400s)",
3055 Py_TYPE(v)->tp_name);
3056 Py_DECREF(v);
3057 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003058 }
3059 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003060
Christian Heimes44720832008-05-26 13:01:01 +00003061 onError:
3062 return NULL;
3063}
3064
3065
3066PyDoc_STRVAR(expandtabs__doc__,
3067"S.expandtabs([tabsize]) -> string\n\
3068\n\
3069Return a copy of S where all tab characters are expanded using spaces.\n\
3070If tabsize is not given, a tab size of 8 characters is assumed.");
3071
3072static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003073string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003074{
3075 const char *e, *p, *qe;
3076 char *q;
3077 Py_ssize_t i, j, incr;
3078 PyObject *u;
3079 int tabsize = 8;
3080
3081 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003082 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003083
3084 /* First pass: determine size of output string */
3085 i = 0; /* chars up to and including most recent \n or \r */
3086 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003087 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3088 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003089 if (*p == '\t') {
3090 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003091 incr = tabsize - (j % tabsize);
3092 if (j > PY_SSIZE_T_MAX - incr)
3093 goto overflow1;
3094 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003095 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003096 }
3097 else {
3098 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003099 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003100 j++;
3101 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003102 if (i > PY_SSIZE_T_MAX - j)
3103 goto overflow1;
3104 i += j;
3105 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003106 }
3107 }
Christian Heimes44720832008-05-26 13:01:01 +00003108
3109 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003110 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003111
3112 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003113 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003114 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003115 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003116
3117 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003118 q = PyString_AS_STRING(u); /* next output char */
3119 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003120
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003121 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003122 if (*p == '\t') {
3123 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003124 i = tabsize - (j % tabsize);
3125 j += i;
3126 while (i--) {
3127 if (q >= qe)
3128 goto overflow2;
3129 *q++ = ' ';
3130 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003131 }
3132 }
3133 else {
3134 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003135 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003136 *q++ = *p;
3137 j++;
3138 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003139 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003140 }
Christian Heimes44720832008-05-26 13:01:01 +00003141
3142 return u;
3143
3144 overflow2:
3145 Py_DECREF(u);
3146 overflow1:
3147 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3148 return NULL;
3149}
3150
3151Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003152pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003153{
3154 PyObject *u;
3155
3156 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003157 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003158 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003159 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003160
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003161 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003162 Py_INCREF(self);
3163 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003164 }
3165
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003166 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003167 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003168 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003169 if (left)
3170 memset(PyString_AS_STRING(u), fill, left);
3171 Py_MEMCPY(PyString_AS_STRING(u) + left,
3172 PyString_AS_STRING(self),
3173 PyString_GET_SIZE(self));
3174 if (right)
3175 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3176 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003177 }
3178
3179 return u;
3180}
3181
3182PyDoc_STRVAR(ljust__doc__,
3183"S.ljust(width[, fillchar]) -> string\n"
3184"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003185"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003186"done using the specified fill character (default is a space).");
3187
3188static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003189string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003190{
3191 Py_ssize_t width;
3192 char fillchar = ' ';
3193
3194 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003195 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003196
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003197 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003198 Py_INCREF(self);
3199 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003200 }
3201
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003202 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003203}
3204
3205
3206PyDoc_STRVAR(rjust__doc__,
3207"S.rjust(width[, fillchar]) -> string\n"
3208"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003209"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003210"done using the specified fill character (default is a space)");
3211
3212static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003213string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003214{
3215 Py_ssize_t width;
3216 char fillchar = ' ';
3217
3218 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003219 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003220
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003221 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003222 Py_INCREF(self);
3223 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003224 }
3225
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003226 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003227}
3228
3229
3230PyDoc_STRVAR(center__doc__,
3231"S.center(width[, fillchar]) -> string\n"
3232"\n"
3233"Return S centered in a string of length width. Padding is\n"
3234"done using the specified fill character (default is a space)");
3235
3236static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003237string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003238{
3239 Py_ssize_t marg, left;
3240 Py_ssize_t width;
3241 char fillchar = ' ';
3242
3243 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003244 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003245
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003246 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003247 Py_INCREF(self);
3248 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003249 }
3250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003252 left = marg / 2 + (marg & width & 1);
3253
3254 return pad(self, left, marg - left, fillchar);
3255}
3256
3257PyDoc_STRVAR(zfill__doc__,
3258"S.zfill(width) -> string\n"
3259"\n"
3260"Pad a numeric string S with zeros on the left, to fill a field\n"
3261"of the specified width. The string S is never truncated.");
3262
3263static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003264string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003265{
3266 Py_ssize_t fill;
3267 PyObject *s;
3268 char *p;
3269 Py_ssize_t width;
3270
3271 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003272 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003273
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003274 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003275 if (PyString_CheckExact(self)) {
3276 Py_INCREF(self);
3277 return (PyObject*) self;
3278 }
3279 else
3280 return PyString_FromStringAndSize(
3281 PyString_AS_STRING(self),
3282 PyString_GET_SIZE(self)
3283 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003284 }
3285
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003286 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003287
Christian Heimes44720832008-05-26 13:01:01 +00003288 s = pad(self, fill, 0, '0');
3289
3290 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003291 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003292
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003293 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003294 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003295 /* move sign to beginning of string */
3296 p[0] = p[fill];
3297 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003298 }
3299
3300 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003301}
3302
Christian Heimes44720832008-05-26 13:01:01 +00003303PyDoc_STRVAR(isspace__doc__,
3304"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003305\n\
Christian Heimes44720832008-05-26 13:01:01 +00003306Return True if all characters in S are whitespace\n\
3307and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308
Christian Heimes44720832008-05-26 13:01:01 +00003309static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003310string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311{
Christian Heimes44720832008-05-26 13:01:01 +00003312 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003313 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003314 register const unsigned char *e;
3315
3316 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003317 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003318 isspace(*p))
3319 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003320
3321 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003323 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003324
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003325 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003326 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003327 if (!isspace(*p))
3328 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003329 }
Christian Heimes44720832008-05-26 13:01:01 +00003330 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003331}
3332
Christian Heimes44720832008-05-26 13:01:01 +00003333
3334PyDoc_STRVAR(isalpha__doc__,
3335"S.isalpha() -> bool\n\
3336\n\
3337Return True if all characters in S are alphabetic\n\
3338and there is at least one character in S, False otherwise.");
3339
3340static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003341string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003342{
Christian Heimes44720832008-05-26 13:01:01 +00003343 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003344 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003345 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003346
Christian Heimes44720832008-05-26 13:01:01 +00003347 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003349 isalpha(*p))
3350 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003351
3352 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003353 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003354 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003355
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003356 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003357 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003358 if (!isalpha(*p))
3359 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360 }
Christian Heimes44720832008-05-26 13:01:01 +00003361 return PyBool_FromLong(1);
3362}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003363
Christian Heimes44720832008-05-26 13:01:01 +00003364
3365PyDoc_STRVAR(isalnum__doc__,
3366"S.isalnum() -> bool\n\
3367\n\
3368Return True if all characters in S are alphanumeric\n\
3369and there is at least one character in S, False otherwise.");
3370
3371static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003373{
3374 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003375 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003376 register const unsigned char *e;
3377
3378 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003379 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003380 isalnum(*p))
3381 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003382
3383 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003385 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003386
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003387 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003388 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003389 if (!isalnum(*p))
3390 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003391 }
3392 return PyBool_FromLong(1);
3393}
3394
3395
3396PyDoc_STRVAR(isdigit__doc__,
3397"S.isdigit() -> bool\n\
3398\n\
3399Return True if all characters in S are digits\n\
3400and there is at least one character in S, False otherwise.");
3401
3402static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003403string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003404{
3405 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003406 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003407 register const unsigned char *e;
3408
3409 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003410 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003411 isdigit(*p))
3412 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003413
3414 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003415 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003416 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003417
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003418 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003419 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003420 if (!isdigit(*p))
3421 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003422 }
3423 return PyBool_FromLong(1);
3424}
3425
3426
3427PyDoc_STRVAR(islower__doc__,
3428"S.islower() -> bool\n\
3429\n\
3430Return True if all cased characters in S are lowercase and there is\n\
3431at least one cased character in S, False otherwise.");
3432
3433static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003434string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003435{
3436 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003437 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003438 register const unsigned char *e;
3439 int cased;
3440
3441 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003442 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003443 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003444
3445 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003446 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003447 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003448
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003450 cased = 0;
3451 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003452 if (isupper(*p))
3453 return PyBool_FromLong(0);
3454 else if (!cased && islower(*p))
3455 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003456 }
3457 return PyBool_FromLong(cased);
3458}
3459
3460
3461PyDoc_STRVAR(isupper__doc__,
3462"S.isupper() -> bool\n\
3463\n\
3464Return True if all cased characters in S are uppercase and there is\n\
3465at least one cased character in S, False otherwise.");
3466
3467static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003468string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003469{
3470 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003471 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003472 register const unsigned char *e;
3473 int cased;
3474
3475 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003476 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003477 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003478
3479 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003481 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003482
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003484 cased = 0;
3485 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003486 if (islower(*p))
3487 return PyBool_FromLong(0);
3488 else if (!cased && isupper(*p))
3489 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003490 }
3491 return PyBool_FromLong(cased);
3492}
3493
3494
3495PyDoc_STRVAR(istitle__doc__,
3496"S.istitle() -> bool\n\
3497\n\
3498Return True if S is a titlecased string and there is at least one\n\
3499character in S, i.e. uppercase characters may only follow uncased\n\
3500characters and lowercase characters only cased ones. Return False\n\
3501otherwise.");
3502
3503static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003504string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003505{
3506 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003507 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003508 register const unsigned char *e;
3509 int cased, previous_is_cased;
3510
3511 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003512 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003513 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003514
3515 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003516 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003517 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003518
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003519 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003520 cased = 0;
3521 previous_is_cased = 0;
3522 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003523 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003524
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003525 if (isupper(ch)) {
3526 if (previous_is_cased)
3527 return PyBool_FromLong(0);
3528 previous_is_cased = 1;
3529 cased = 1;
3530 }
3531 else if (islower(ch)) {
3532 if (!previous_is_cased)
3533 return PyBool_FromLong(0);
3534 previous_is_cased = 1;
3535 cased = 1;
3536 }
3537 else
3538 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003539 }
3540 return PyBool_FromLong(cased);
3541}
3542
3543
3544PyDoc_STRVAR(splitlines__doc__,
3545"S.splitlines([keepends]) -> list of strings\n\
3546\n\
3547Return a list of the lines in S, breaking at line boundaries.\n\
3548Line breaks are not included in the resulting list unless keepends\n\
3549is given and true.");
3550
3551static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003552string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003553{
Christian Heimes44720832008-05-26 13:01:01 +00003554 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003555
3556 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003557 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003558
Antoine Pitrou64672132010-01-13 07:55:48 +00003559 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003560 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3561 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003562 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003563}
3564
Robert Schuppenies51df0642008-06-01 16:16:17 +00003565PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003566"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567
3568static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003569string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003570{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003571 Py_ssize_t res;
3572 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3573 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003574}
3575
Christian Heimes1a6387e2008-03-26 12:49:49 +00003576static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003577string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003579 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003580}
3581
Christian Heimes1a6387e2008-03-26 12:49:49 +00003582
Christian Heimes44720832008-05-26 13:01:01 +00003583#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003584
Christian Heimes44720832008-05-26 13:01:01 +00003585PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003586"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003587\n\
Eric Smith6c840852010-11-06 19:43:44 +00003588Return a formatted version of S, using substitutions from args and kwargs.\n\
3589The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003590
Eric Smithdc13b792008-05-30 18:10:04 +00003591static PyObject *
3592string__format__(PyObject* self, PyObject* args)
3593{
3594 PyObject *format_spec;
3595 PyObject *result = NULL;
3596 PyObject *tmp = NULL;
3597
3598 /* If 2.x, convert format_spec to the same type as value */
3599 /* This is to allow things like u''.format('') */
3600 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003601 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003602 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003603 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3604 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3605 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003606 }
3607 tmp = PyObject_Str(format_spec);
3608 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003609 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003610 format_spec = tmp;
3611
3612 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003613 PyString_AS_STRING(format_spec),
3614 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003615done:
3616 Py_XDECREF(tmp);
3617 return result;
3618}
3619
Christian Heimes44720832008-05-26 13:01:01 +00003620PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003621"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003622\n\
Eric Smith6c840852010-11-06 19:43:44 +00003623Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003624
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003625
Christian Heimes1a6387e2008-03-26 12:49:49 +00003626static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003627string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003628 /* Counterparts of the obsolete stropmodule functions; except
3629 string.maketrans(). */
3630 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3631 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3632 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3633 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3634 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3635 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3636 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3637 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3638 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3639 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3640 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3641 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3642 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3643 capitalize__doc__},
3644 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3645 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3646 endswith__doc__},
3647 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3648 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3649 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3650 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3651 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3652 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3653 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3654 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3655 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3656 rpartition__doc__},
3657 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3658 startswith__doc__},
3659 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3660 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3661 swapcase__doc__},
3662 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3663 translate__doc__},
3664 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3665 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3666 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3667 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3668 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3669 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3670 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3671 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3672 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3673 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3674 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3675 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3676 expandtabs__doc__},
3677 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3678 splitlines__doc__},
3679 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3680 sizeof__doc__},
3681 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3682 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003683};
3684
3685static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003686str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003687
Christian Heimes44720832008-05-26 13:01:01 +00003688static PyObject *
3689string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3690{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003691 PyObject *x = NULL;
3692 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003693
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003694 if (type != &PyString_Type)
3695 return str_subtype_new(type, args, kwds);
3696 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3697 return NULL;
3698 if (x == NULL)
3699 return PyString_FromString("");
3700 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003701}
3702
3703static PyObject *
3704str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3705{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003706 PyObject *tmp, *pnew;
3707 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003708
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003709 assert(PyType_IsSubtype(type, &PyString_Type));
3710 tmp = string_new(&PyString_Type, args, kwds);
3711 if (tmp == NULL)
3712 return NULL;
3713 assert(PyString_CheckExact(tmp));
3714 n = PyString_GET_SIZE(tmp);
3715 pnew = type->tp_alloc(type, n);
3716 if (pnew != NULL) {
3717 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3718 ((PyStringObject *)pnew)->ob_shash =
3719 ((PyStringObject *)tmp)->ob_shash;
3720 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3721 }
3722 Py_DECREF(tmp);
3723 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003724}
3725
3726static PyObject *
3727basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3728{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003729 PyErr_SetString(PyExc_TypeError,
3730 "The basestring type cannot be instantiated");
3731 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003732}
3733
3734static PyObject *
3735string_mod(PyObject *v, PyObject *w)
3736{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003737 if (!PyString_Check(v)) {
3738 Py_INCREF(Py_NotImplemented);
3739 return Py_NotImplemented;
3740 }
3741 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003742}
3743
3744PyDoc_STRVAR(basestring_doc,
3745"Type basestring cannot be instantiated; it is the base for str and unicode.");
3746
3747static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003748 0, /*nb_add*/
3749 0, /*nb_subtract*/
3750 0, /*nb_multiply*/
3751 0, /*nb_divide*/
3752 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003753};
3754
3755
3756PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003757 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3758 "basestring",
3759 0,
3760 0,
3761 0, /* tp_dealloc */
3762 0, /* tp_print */
3763 0, /* tp_getattr */
3764 0, /* tp_setattr */
3765 0, /* tp_compare */
3766 0, /* tp_repr */
3767 0, /* tp_as_number */
3768 0, /* tp_as_sequence */
3769 0, /* tp_as_mapping */
3770 0, /* tp_hash */
3771 0, /* tp_call */
3772 0, /* tp_str */
3773 0, /* tp_getattro */
3774 0, /* tp_setattro */
3775 0, /* tp_as_buffer */
3776 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3777 basestring_doc, /* tp_doc */
3778 0, /* tp_traverse */
3779 0, /* tp_clear */
3780 0, /* tp_richcompare */
3781 0, /* tp_weaklistoffset */
3782 0, /* tp_iter */
3783 0, /* tp_iternext */
3784 0, /* tp_methods */
3785 0, /* tp_members */
3786 0, /* tp_getset */
3787 &PyBaseObject_Type, /* tp_base */
3788 0, /* tp_dict */
3789 0, /* tp_descr_get */
3790 0, /* tp_descr_set */
3791 0, /* tp_dictoffset */
3792 0, /* tp_init */
3793 0, /* tp_alloc */
3794 basestring_new, /* tp_new */
3795 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003796};
3797
3798PyDoc_STRVAR(string_doc,
3799"str(object) -> string\n\
3800\n\
3801Return a nice string representation of the object.\n\
3802If the argument is a string, the return value is the same object.");
3803
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003805 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3806 "str",
3807 PyStringObject_SIZE,
3808 sizeof(char),
3809 string_dealloc, /* tp_dealloc */
3810 (printfunc)string_print, /* tp_print */
3811 0, /* tp_getattr */
3812 0, /* tp_setattr */
3813 0, /* tp_compare */
3814 string_repr, /* tp_repr */
3815 &string_as_number, /* tp_as_number */
3816 &string_as_sequence, /* tp_as_sequence */
3817 &string_as_mapping, /* tp_as_mapping */
3818 (hashfunc)string_hash, /* tp_hash */
3819 0, /* tp_call */
3820 string_str, /* tp_str */
3821 PyObject_GenericGetAttr, /* tp_getattro */
3822 0, /* tp_setattro */
3823 &string_as_buffer, /* tp_as_buffer */
3824 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3825 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3826 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3827 string_doc, /* tp_doc */
3828 0, /* tp_traverse */
3829 0, /* tp_clear */
3830 (richcmpfunc)string_richcompare, /* tp_richcompare */
3831 0, /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 string_methods, /* tp_methods */
3835 0, /* tp_members */
3836 0, /* tp_getset */
3837 &PyBaseString_Type, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
3842 0, /* tp_init */
3843 0, /* tp_alloc */
3844 string_new, /* tp_new */
3845 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003846};
3847
3848void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003849PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003851 register PyObject *v;
3852 if (*pv == NULL)
3853 return;
3854 if (w == NULL || !PyString_Check(*pv)) {
3855 Py_DECREF(*pv);
3856 *pv = NULL;
3857 return;
3858 }
3859 v = string_concat((PyStringObject *) *pv, w);
3860 Py_DECREF(*pv);
3861 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003862}
3863
3864void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003865PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003866{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003867 PyString_Concat(pv, w);
3868 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003869}
3870
3871
3872/* The following function breaks the notion that strings are immutable:
3873 it changes the size of a string. We get away with this only if there
3874 is only one module referencing the object. You can also think of it
3875 as creating a new string object and destroying the old one, only
3876 more efficiently. In any case, don't use this if the string may
3877 already be known to some other part of the code...
3878 Note that if there's not enough memory to resize the string, the original
3879 string object at *pv is deallocated, *pv is set to NULL, an "out of
3880 memory" exception is set, and -1 is returned. Else (on success) 0 is
3881 returned, and the value in *pv may or may not be the same as on input.
3882 As always, an extra byte is allocated for a trailing \0 byte (newsize
3883 does *not* include that), and a trailing \0 byte is stored.
3884*/
3885
3886int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003887_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003888{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003889 register PyObject *v;
3890 register PyStringObject *sv;
3891 v = *pv;
3892 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3893 PyString_CHECK_INTERNED(v)) {
3894 *pv = 0;
3895 Py_DECREF(v);
3896 PyErr_BadInternalCall();
3897 return -1;
3898 }
3899 /* XXX UNREF/NEWREF interface should be more symmetrical */
3900 _Py_DEC_REFTOTAL;
3901 _Py_ForgetReference(v);
3902 *pv = (PyObject *)
3903 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3904 if (*pv == NULL) {
3905 PyObject_Del(v);
3906 PyErr_NoMemory();
3907 return -1;
3908 }
3909 _Py_NewReference(*pv);
3910 sv = (PyStringObject *) *pv;
3911 Py_SIZE(sv) = newsize;
3912 sv->ob_sval[newsize] = '\0';
3913 sv->ob_shash = -1; /* invalidate cached hash value */
3914 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003915}
3916
3917/* Helpers for formatstring */
3918
3919Py_LOCAL_INLINE(PyObject *)
3920getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3921{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003922 Py_ssize_t argidx = *p_argidx;
3923 if (argidx < arglen) {
3924 (*p_argidx)++;
3925 if (arglen < 0)
3926 return args;
3927 else
3928 return PyTuple_GetItem(args, argidx);
3929 }
3930 PyErr_SetString(PyExc_TypeError,
3931 "not enough arguments for format string");
3932 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003933}
3934
3935/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003936 * F_LJUST '-'
3937 * F_SIGN '+'
3938 * F_BLANK ' '
3939 * F_ALT '#'
3940 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003941 */
3942#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003943#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003944#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003945#define F_ALT (1<<3)
3946#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003947
Mark Dickinson18cfada2009-11-23 18:46:41 +00003948/* Returns a new reference to a PyString object, or NULL on failure. */
3949
3950static PyObject *
3951formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003952{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003953 char *p;
3954 PyObject *result;
3955 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003956
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003957 x = PyFloat_AsDouble(v);
3958 if (x == -1.0 && PyErr_Occurred()) {
3959 PyErr_Format(PyExc_TypeError, "float argument required, "
3960 "not %.200s", Py_TYPE(v)->tp_name);
3961 return NULL;
3962 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003963
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003964 if (prec < 0)
3965 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003966
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003967 p = PyOS_double_to_string(x, type, prec,
3968 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003969
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003970 if (p == NULL)
3971 return NULL;
3972 result = PyString_FromStringAndSize(p, strlen(p));
3973 PyMem_Free(p);
3974 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003975}
3976
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003977/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003978 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3979 * Python's regular ints.
3980 * Return value: a new PyString*, or NULL if error.
3981 * . *pbuf is set to point into it,
3982 * *plen set to the # of chars following that.
3983 * Caller must decref it when done using pbuf.
3984 * The string starting at *pbuf is of the form
3985 * "-"? ("0x" | "0X")? digit+
3986 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3987 * set in flags. The case of hex digits will be correct,
3988 * There will be at least prec digits, zero-filled on the left if
3989 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003990 * val object to be converted
3991 * flags bitmask of format flags; only F_ALT is looked at
3992 * prec minimum number of digits; 0-fill on left if needed
3993 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003994 *
3995 * CAUTION: o, x and X conversions on regular ints can never
3996 * produce a '-' sign, but can for Python's unbounded ints.
3997 */
3998PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003999_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004000 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004001{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004002 PyObject *result = NULL;
4003 char *buf;
4004 Py_ssize_t i;
4005 int sign; /* 1 if '-', else 0 */
4006 int len; /* number of characters */
4007 Py_ssize_t llen;
4008 int numdigits; /* len == numnondigits + numdigits */
4009 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004010
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004011 switch (type) {
4012 case 'd':
4013 case 'u':
4014 result = Py_TYPE(val)->tp_str(val);
4015 break;
4016 case 'o':
4017 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4018 break;
4019 case 'x':
4020 case 'X':
4021 numnondigits = 2;
4022 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4023 break;
4024 default:
4025 assert(!"'type' not in [duoxX]");
4026 }
4027 if (!result)
4028 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004029
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004030 buf = PyString_AsString(result);
4031 if (!buf) {
4032 Py_DECREF(result);
4033 return NULL;
4034 }
Christian Heimes44720832008-05-26 13:01:01 +00004035
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004036 /* To modify the string in-place, there can only be one reference. */
4037 if (Py_REFCNT(result) != 1) {
4038 PyErr_BadInternalCall();
4039 return NULL;
4040 }
4041 llen = PyString_Size(result);
4042 if (llen > INT_MAX) {
4043 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4044 return NULL;
4045 }
4046 len = (int)llen;
4047 if (buf[len-1] == 'L') {
4048 --len;
4049 buf[len] = '\0';
4050 }
4051 sign = buf[0] == '-';
4052 numnondigits += sign;
4053 numdigits = len - numnondigits;
4054 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004055
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004056 /* Get rid of base marker unless F_ALT */
4057 if ((flags & F_ALT) == 0) {
4058 /* Need to skip 0x, 0X or 0. */
4059 int skipped = 0;
4060 switch (type) {
4061 case 'o':
4062 assert(buf[sign] == '0');
4063 /* If 0 is only digit, leave it alone. */
4064 if (numdigits > 1) {
4065 skipped = 1;
4066 --numdigits;
4067 }
4068 break;
4069 case 'x':
4070 case 'X':
4071 assert(buf[sign] == '0');
4072 assert(buf[sign + 1] == 'x');
4073 skipped = 2;
4074 numnondigits -= 2;
4075 break;
4076 }
4077 if (skipped) {
4078 buf += skipped;
4079 len -= skipped;
4080 if (sign)
4081 buf[0] = '-';
4082 }
4083 assert(len == numnondigits + numdigits);
4084 assert(numdigits > 0);
4085 }
Christian Heimes44720832008-05-26 13:01:01 +00004086
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004087 /* Fill with leading zeroes to meet minimum width. */
4088 if (prec > numdigits) {
4089 PyObject *r1 = PyString_FromStringAndSize(NULL,
4090 numnondigits + prec);
4091 char *b1;
4092 if (!r1) {
4093 Py_DECREF(result);
4094 return NULL;
4095 }
4096 b1 = PyString_AS_STRING(r1);
4097 for (i = 0; i < numnondigits; ++i)
4098 *b1++ = *buf++;
4099 for (i = 0; i < prec - numdigits; i++)
4100 *b1++ = '0';
4101 for (i = 0; i < numdigits; i++)
4102 *b1++ = *buf++;
4103 *b1 = '\0';
4104 Py_DECREF(result);
4105 result = r1;
4106 buf = PyString_AS_STRING(result);
4107 len = numnondigits + prec;
4108 }
Christian Heimes44720832008-05-26 13:01:01 +00004109
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004110 /* Fix up case for hex conversions. */
4111 if (type == 'X') {
4112 /* Need to convert all lower case letters to upper case.
4113 and need to convert 0x to 0X (and -0x to -0X). */
4114 for (i = 0; i < len; i++)
4115 if (buf[i] >= 'a' && buf[i] <= 'x')
4116 buf[i] -= 'a'-'A';
4117 }
4118 *pbuf = buf;
4119 *plen = len;
4120 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004121}
4122
4123Py_LOCAL_INLINE(int)
4124formatint(char *buf, size_t buflen, int flags,
4125 int prec, int type, PyObject *v)
4126{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004127 /* fmt = '%#.' + `prec` + 'l' + `type`
4128 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4129 + 1 + 1 = 24 */
4130 char fmt[64]; /* plenty big enough! */
4131 char *sign;
4132 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004133
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004134 x = PyInt_AsLong(v);
4135 if (x == -1 && PyErr_Occurred()) {
4136 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4137 Py_TYPE(v)->tp_name);
4138 return -1;
4139 }
4140 if (x < 0 && type == 'u') {
4141 type = 'd';
4142 }
4143 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4144 sign = "-";
4145 else
4146 sign = "";
4147 if (prec < 0)
4148 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004150 if ((flags & F_ALT) &&
4151 (type == 'x' || type == 'X')) {
4152 /* When converting under %#x or %#X, there are a number
4153 * of issues that cause pain:
4154 * - when 0 is being converted, the C standard leaves off
4155 * the '0x' or '0X', which is inconsistent with other
4156 * %#x/%#X conversions and inconsistent with Python's
4157 * hex() function
4158 * - there are platforms that violate the standard and
4159 * convert 0 with the '0x' or '0X'
4160 * (Metrowerks, Compaq Tru64)
4161 * - there are platforms that give '0x' when converting
4162 * under %#X, but convert 0 in accordance with the
4163 * standard (OS/2 EMX)
4164 *
4165 * We can achieve the desired consistency by inserting our
4166 * own '0x' or '0X' prefix, and substituting %x/%X in place
4167 * of %#x/%#X.
4168 *
4169 * Note that this is the same approach as used in
4170 * formatint() in unicodeobject.c
4171 */
4172 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4173 sign, type, prec, type);
4174 }
4175 else {
4176 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4177 sign, (flags&F_ALT) ? "#" : "",
4178 prec, type);
4179 }
Christian Heimes44720832008-05-26 13:01:01 +00004180
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004181 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4182 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4183 */
4184 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4185 PyErr_SetString(PyExc_OverflowError,
4186 "formatted integer is too long (precision too large?)");
4187 return -1;
4188 }
4189 if (sign[0])
4190 PyOS_snprintf(buf, buflen, fmt, -x);
4191 else
4192 PyOS_snprintf(buf, buflen, fmt, x);
4193 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004194}
4195
4196Py_LOCAL_INLINE(int)
4197formatchar(char *buf, size_t buflen, PyObject *v)
4198{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004199 /* presume that the buffer is at least 2 characters long */
4200 if (PyString_Check(v)) {
4201 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4202 return -1;
4203 }
4204 else {
4205 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4206 return -1;
4207 }
4208 buf[1] = '\0';
4209 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004210}
4211
4212/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4213
Mark Dickinson18cfada2009-11-23 18:46:41 +00004214 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004215 chars are formatted. XXX This is a magic number. Each formatting
4216 routine does bounds checking to ensure no overflow, but a better
4217 solution may be to malloc a buffer of appropriate size for each
4218 format. For now, the current solution is sufficient.
4219*/
4220#define FORMATBUFLEN (size_t)120
4221
4222PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004223PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004224{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004225 char *fmt, *res;
4226 Py_ssize_t arglen, argidx;
4227 Py_ssize_t reslen, rescnt, fmtcnt;
4228 int args_owned = 0;
4229 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004230#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004231 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004232#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004233 PyObject *dict = NULL;
4234 if (format == NULL || !PyString_Check(format) || args == NULL) {
4235 PyErr_BadInternalCall();
4236 return NULL;
4237 }
4238 orig_args = args;
4239 fmt = PyString_AS_STRING(format);
4240 fmtcnt = PyString_GET_SIZE(format);
4241 reslen = rescnt = fmtcnt + 100;
4242 result = PyString_FromStringAndSize((char *)NULL, reslen);
4243 if (result == NULL)
4244 return NULL;
4245 res = PyString_AsString(result);
4246 if (PyTuple_Check(args)) {
4247 arglen = PyTuple_GET_SIZE(args);
4248 argidx = 0;
4249 }
4250 else {
4251 arglen = -1;
4252 argidx = -2;
4253 }
4254 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4255 !PyObject_TypeCheck(args, &PyBaseString_Type))
4256 dict = args;
4257 while (--fmtcnt >= 0) {
4258 if (*fmt != '%') {
4259 if (--rescnt < 0) {
4260 rescnt = fmtcnt + 100;
4261 reslen += rescnt;
4262 if (_PyString_Resize(&result, reslen))
4263 return NULL;
4264 res = PyString_AS_STRING(result)
4265 + reslen - rescnt;
4266 --rescnt;
4267 }
4268 *res++ = *fmt++;
4269 }
4270 else {
4271 /* Got a format specifier */
4272 int flags = 0;
4273 Py_ssize_t width = -1;
4274 int prec = -1;
4275 int c = '\0';
4276 int fill;
4277 int isnumok;
4278 PyObject *v = NULL;
4279 PyObject *temp = NULL;
4280 char *pbuf;
4281 int sign;
4282 Py_ssize_t len;
4283 char formatbuf[FORMATBUFLEN];
4284 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004285#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004286 char *fmt_start = fmt;
4287 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004288#endif
4289
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004290 fmt++;
4291 if (*fmt == '(') {
4292 char *keystart;
4293 Py_ssize_t keylen;
4294 PyObject *key;
4295 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004296
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004297 if (dict == NULL) {
4298 PyErr_SetString(PyExc_TypeError,
4299 "format requires a mapping");
4300 goto error;
4301 }
4302 ++fmt;
4303 --fmtcnt;
4304 keystart = fmt;
4305 /* Skip over balanced parentheses */
4306 while (pcount > 0 && --fmtcnt >= 0) {
4307 if (*fmt == ')')
4308 --pcount;
4309 else if (*fmt == '(')
4310 ++pcount;
4311 fmt++;
4312 }
4313 keylen = fmt - keystart - 1;
4314 if (fmtcnt < 0 || pcount > 0) {
4315 PyErr_SetString(PyExc_ValueError,
4316 "incomplete format key");
4317 goto error;
4318 }
4319 key = PyString_FromStringAndSize(keystart,
4320 keylen);
4321 if (key == NULL)
4322 goto error;
4323 if (args_owned) {
4324 Py_DECREF(args);
4325 args_owned = 0;
4326 }
4327 args = PyObject_GetItem(dict, key);
4328 Py_DECREF(key);
4329 if (args == NULL) {
4330 goto error;
4331 }
4332 args_owned = 1;
4333 arglen = -1;
4334 argidx = -2;
4335 }
4336 while (--fmtcnt >= 0) {
4337 switch (c = *fmt++) {
4338 case '-': flags |= F_LJUST; continue;
4339 case '+': flags |= F_SIGN; continue;
4340 case ' ': flags |= F_BLANK; continue;
4341 case '#': flags |= F_ALT; continue;
4342 case '0': flags |= F_ZERO; continue;
4343 }
4344 break;
4345 }
4346 if (c == '*') {
4347 v = getnextarg(args, arglen, &argidx);
4348 if (v == NULL)
4349 goto error;
4350 if (!PyInt_Check(v)) {
4351 PyErr_SetString(PyExc_TypeError,
4352 "* wants int");
4353 goto error;
4354 }
4355 width = PyInt_AsLong(v);
4356 if (width < 0) {
4357 flags |= F_LJUST;
4358 width = -width;
4359 }
4360 if (--fmtcnt >= 0)
4361 c = *fmt++;
4362 }
4363 else if (c >= 0 && isdigit(c)) {
4364 width = c - '0';
4365 while (--fmtcnt >= 0) {
4366 c = Py_CHARMASK(*fmt++);
4367 if (!isdigit(c))
4368 break;
4369 if ((width*10) / 10 != width) {
4370 PyErr_SetString(
4371 PyExc_ValueError,
4372 "width too big");
4373 goto error;
4374 }
4375 width = width*10 + (c - '0');
4376 }
4377 }
4378 if (c == '.') {
4379 prec = 0;
4380 if (--fmtcnt >= 0)
4381 c = *fmt++;
4382 if (c == '*') {
4383 v = getnextarg(args, arglen, &argidx);
4384 if (v == NULL)
4385 goto error;
4386 if (!PyInt_Check(v)) {
4387 PyErr_SetString(
4388 PyExc_TypeError,
4389 "* wants int");
4390 goto error;
4391 }
4392 prec = PyInt_AsLong(v);
4393 if (prec < 0)
4394 prec = 0;
4395 if (--fmtcnt >= 0)
4396 c = *fmt++;
4397 }
4398 else if (c >= 0 && isdigit(c)) {
4399 prec = c - '0';
4400 while (--fmtcnt >= 0) {
4401 c = Py_CHARMASK(*fmt++);
4402 if (!isdigit(c))
4403 break;
4404 if ((prec*10) / 10 != prec) {
4405 PyErr_SetString(
4406 PyExc_ValueError,
4407 "prec too big");
4408 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004409 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004410 prec = prec*10 + (c - '0');
4411 }
4412 }
4413 } /* prec */
4414 if (fmtcnt >= 0) {
4415 if (c == 'h' || c == 'l' || c == 'L') {
4416 if (--fmtcnt >= 0)
4417 c = *fmt++;
4418 }
4419 }
4420 if (fmtcnt < 0) {
4421 PyErr_SetString(PyExc_ValueError,
4422 "incomplete format");
4423 goto error;
4424 }
4425 if (c != '%') {
4426 v = getnextarg(args, arglen, &argidx);
4427 if (v == NULL)
4428 goto error;
4429 }
4430 sign = 0;
4431 fill = ' ';
4432 switch (c) {
4433 case '%':
4434 pbuf = "%";
4435 len = 1;
4436 break;
4437 case 's':
4438#ifdef Py_USING_UNICODE
4439 if (PyUnicode_Check(v)) {
4440 fmt = fmt_start;
4441 argidx = argidx_start;
4442 goto unicode;
4443 }
4444#endif
4445 temp = _PyObject_Str(v);
4446#ifdef Py_USING_UNICODE
4447 if (temp != NULL && PyUnicode_Check(temp)) {
4448 Py_DECREF(temp);
4449 fmt = fmt_start;
4450 argidx = argidx_start;
4451 goto unicode;
4452 }
4453#endif
4454 /* Fall through */
4455 case 'r':
4456 if (c == 'r')
4457 temp = PyObject_Repr(v);
4458 if (temp == NULL)
4459 goto error;
4460 if (!PyString_Check(temp)) {
4461 PyErr_SetString(PyExc_TypeError,
4462 "%s argument has non-string str()");
4463 Py_DECREF(temp);
4464 goto error;
4465 }
4466 pbuf = PyString_AS_STRING(temp);
4467 len = PyString_GET_SIZE(temp);
4468 if (prec >= 0 && len > prec)
4469 len = prec;
4470 break;
4471 case 'i':
4472 case 'd':
4473 case 'u':
4474 case 'o':
4475 case 'x':
4476 case 'X':
4477 if (c == 'i')
4478 c = 'd';
4479 isnumok = 0;
4480 if (PyNumber_Check(v)) {
4481 PyObject *iobj=NULL;
4482
4483 if (PyInt_Check(v) || (PyLong_Check(v))) {
4484 iobj = v;
4485 Py_INCREF(iobj);
4486 }
4487 else {
4488 iobj = PyNumber_Int(v);
4489 if (iobj==NULL) iobj = PyNumber_Long(v);
4490 }
4491 if (iobj!=NULL) {
4492 if (PyInt_Check(iobj)) {
4493 isnumok = 1;
4494 pbuf = formatbuf;
4495 len = formatint(pbuf,
4496 sizeof(formatbuf),
4497 flags, prec, c, iobj);
4498 Py_DECREF(iobj);
4499 if (len < 0)
4500 goto error;
4501 sign = 1;
4502 }
4503 else if (PyLong_Check(iobj)) {
4504 int ilen;
4505
4506 isnumok = 1;
4507 temp = _PyString_FormatLong(iobj, flags,
4508 prec, c, &pbuf, &ilen);
4509 Py_DECREF(iobj);
4510 len = ilen;
4511 if (!temp)
4512 goto error;
4513 sign = 1;
4514 }
4515 else {
4516 Py_DECREF(iobj);
4517 }
4518 }
4519 }
4520 if (!isnumok) {
4521 PyErr_Format(PyExc_TypeError,
4522 "%%%c format: a number is required, "
4523 "not %.200s", c, Py_TYPE(v)->tp_name);
4524 goto error;
4525 }
4526 if (flags & F_ZERO)
4527 fill = '0';
4528 break;
4529 case 'e':
4530 case 'E':
4531 case 'f':
4532 case 'F':
4533 case 'g':
4534 case 'G':
4535 temp = formatfloat(v, flags, prec, c);
4536 if (temp == NULL)
4537 goto error;
4538 pbuf = PyString_AS_STRING(temp);
4539 len = PyString_GET_SIZE(temp);
4540 sign = 1;
4541 if (flags & F_ZERO)
4542 fill = '0';
4543 break;
4544 case 'c':
4545#ifdef Py_USING_UNICODE
4546 if (PyUnicode_Check(v)) {
4547 fmt = fmt_start;
4548 argidx = argidx_start;
4549 goto unicode;
4550 }
4551#endif
4552 pbuf = formatbuf;
4553 len = formatchar(pbuf, sizeof(formatbuf), v);
4554 if (len < 0)
4555 goto error;
4556 break;
4557 default:
4558 PyErr_Format(PyExc_ValueError,
4559 "unsupported format character '%c' (0x%x) "
4560 "at index %zd",
4561 c, c,
4562 (Py_ssize_t)(fmt - 1 -
4563 PyString_AsString(format)));
4564 goto error;
4565 }
4566 if (sign) {
4567 if (*pbuf == '-' || *pbuf == '+') {
4568 sign = *pbuf++;
4569 len--;
4570 }
4571 else if (flags & F_SIGN)
4572 sign = '+';
4573 else if (flags & F_BLANK)
4574 sign = ' ';
4575 else
4576 sign = 0;
4577 }
4578 if (width < len)
4579 width = len;
4580 if (rescnt - (sign != 0) < width) {
4581 reslen -= rescnt;
4582 rescnt = width + fmtcnt + 100;
4583 reslen += rescnt;
4584 if (reslen < 0) {
4585 Py_DECREF(result);
4586 Py_XDECREF(temp);
4587 return PyErr_NoMemory();
4588 }
4589 if (_PyString_Resize(&result, reslen)) {
4590 Py_XDECREF(temp);
4591 return NULL;
4592 }
4593 res = PyString_AS_STRING(result)
4594 + reslen - rescnt;
4595 }
4596 if (sign) {
4597 if (fill != ' ')
4598 *res++ = sign;
4599 rescnt--;
4600 if (width > len)
4601 width--;
4602 }
4603 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4604 assert(pbuf[0] == '0');
4605 assert(pbuf[1] == c);
4606 if (fill != ' ') {
4607 *res++ = *pbuf++;
4608 *res++ = *pbuf++;
4609 }
4610 rescnt -= 2;
4611 width -= 2;
4612 if (width < 0)
4613 width = 0;
4614 len -= 2;
4615 }
4616 if (width > len && !(flags & F_LJUST)) {
4617 do {
4618 --rescnt;
4619 *res++ = fill;
4620 } while (--width > len);
4621 }
4622 if (fill == ' ') {
4623 if (sign)
4624 *res++ = sign;
4625 if ((flags & F_ALT) &&
4626 (c == 'x' || c == 'X')) {
4627 assert(pbuf[0] == '0');
4628 assert(pbuf[1] == c);
4629 *res++ = *pbuf++;
4630 *res++ = *pbuf++;
4631 }
4632 }
4633 Py_MEMCPY(res, pbuf, len);
4634 res += len;
4635 rescnt -= len;
4636 while (--width >= len) {
4637 --rescnt;
4638 *res++ = ' ';
4639 }
4640 if (dict && (argidx < arglen) && c != '%') {
4641 PyErr_SetString(PyExc_TypeError,
4642 "not all arguments converted during string formatting");
4643 Py_XDECREF(temp);
4644 goto error;
4645 }
4646 Py_XDECREF(temp);
4647 } /* '%' */
4648 } /* until end */
4649 if (argidx < arglen && !dict) {
4650 PyErr_SetString(PyExc_TypeError,
4651 "not all arguments converted during string formatting");
4652 goto error;
4653 }
4654 if (args_owned) {
4655 Py_DECREF(args);
4656 }
4657 if (_PyString_Resize(&result, reslen - rescnt))
4658 return NULL;
4659 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004660
4661#ifdef Py_USING_UNICODE
4662 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004663 if (args_owned) {
4664 Py_DECREF(args);
4665 args_owned = 0;
4666 }
4667 /* Fiddle args right (remove the first argidx arguments) */
4668 if (PyTuple_Check(orig_args) && argidx > 0) {
4669 PyObject *v;
4670 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4671 v = PyTuple_New(n);
4672 if (v == NULL)
4673 goto error;
4674 while (--n >= 0) {
4675 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4676 Py_INCREF(w);
4677 PyTuple_SET_ITEM(v, n, w);
4678 }
4679 args = v;
4680 } else {
4681 Py_INCREF(orig_args);
4682 args = orig_args;
4683 }
4684 args_owned = 1;
4685 /* Take what we have of the result and let the Unicode formatting
4686 function format the rest of the input. */
4687 rescnt = res - PyString_AS_STRING(result);
4688 if (_PyString_Resize(&result, rescnt))
4689 goto error;
4690 fmtcnt = PyString_GET_SIZE(format) - \
4691 (fmt - PyString_AS_STRING(format));
4692 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4693 if (format == NULL)
4694 goto error;
4695 v = PyUnicode_Format(format, args);
4696 Py_DECREF(format);
4697 if (v == NULL)
4698 goto error;
4699 /* Paste what we have (result) to what the Unicode formatting
4700 function returned (v) and return the result (or error) */
4701 w = PyUnicode_Concat(result, v);
4702 Py_DECREF(result);
4703 Py_DECREF(v);
4704 Py_DECREF(args);
4705 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004706#endif /* Py_USING_UNICODE */
4707
4708 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004709 Py_DECREF(result);
4710 if (args_owned) {
4711 Py_DECREF(args);
4712 }
4713 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004714}
4715
4716void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004717PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004718{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004719 register PyStringObject *s = (PyStringObject *)(*p);
4720 PyObject *t;
4721 if (s == NULL || !PyString_Check(s))
4722 Py_FatalError("PyString_InternInPlace: strings only please!");
4723 /* If it's a string subclass, we don't really know what putting
4724 it in the interned dict might do. */
4725 if (!PyString_CheckExact(s))
4726 return;
4727 if (PyString_CHECK_INTERNED(s))
4728 return;
4729 if (interned == NULL) {
4730 interned = PyDict_New();
4731 if (interned == NULL) {
4732 PyErr_Clear(); /* Don't leave an exception */
4733 return;
4734 }
4735 }
4736 t = PyDict_GetItem(interned, (PyObject *)s);
4737 if (t) {
4738 Py_INCREF(t);
4739 Py_DECREF(*p);
4740 *p = t;
4741 return;
4742 }
Christian Heimes44720832008-05-26 13:01:01 +00004743
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004744 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4745 PyErr_Clear();
4746 return;
4747 }
4748 /* The two references in interned are not counted by refcnt.
4749 The string deallocator will take care of this */
4750 Py_REFCNT(s) -= 2;
4751 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004752}
4753
4754void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004755PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004756{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004757 PyString_InternInPlace(p);
4758 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4759 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4760 Py_INCREF(*p);
4761 }
Christian Heimes44720832008-05-26 13:01:01 +00004762}
4763
4764
4765PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004766PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004767{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004768 PyObject *s = PyString_FromString(cp);
4769 if (s == NULL)
4770 return NULL;
4771 PyString_InternInPlace(&s);
4772 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004773}
4774
4775void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004776PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004777{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004778 int i;
4779 for (i = 0; i < UCHAR_MAX + 1; i++) {
4780 Py_XDECREF(characters[i]);
4781 characters[i] = NULL;
4782 }
4783 Py_XDECREF(nullstring);
4784 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004785}
4786
4787void _Py_ReleaseInternedStrings(void)
4788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004789 PyObject *keys;
4790 PyStringObject *s;
4791 Py_ssize_t i, n;
4792 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004793
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004794 if (interned == NULL || !PyDict_Check(interned))
4795 return;
4796 keys = PyDict_Keys(interned);
4797 if (keys == NULL || !PyList_Check(keys)) {
4798 PyErr_Clear();
4799 return;
4800 }
Christian Heimes44720832008-05-26 13:01:01 +00004801
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004802 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4803 detector, interned strings are not forcibly deallocated; rather, we
4804 give them their stolen references back, and then clear and DECREF
4805 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004806
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004807 n = PyList_GET_SIZE(keys);
4808 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4809 n);
4810 for (i = 0; i < n; i++) {
4811 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4812 switch (s->ob_sstate) {
4813 case SSTATE_NOT_INTERNED:
4814 /* XXX Shouldn't happen */
4815 break;
4816 case SSTATE_INTERNED_IMMORTAL:
4817 Py_REFCNT(s) += 1;
4818 immortal_size += Py_SIZE(s);
4819 break;
4820 case SSTATE_INTERNED_MORTAL:
4821 Py_REFCNT(s) += 2;
4822 mortal_size += Py_SIZE(s);
4823 break;
4824 default:
4825 Py_FatalError("Inconsistent interned string state.");
4826 }
4827 s->ob_sstate = SSTATE_NOT_INTERNED;
4828 }
4829 fprintf(stderr, "total size of all interned strings: "
4830 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4831 "mortal/immortal\n", mortal_size, immortal_size);
4832 Py_DECREF(keys);
4833 PyDict_Clear(interned);
4834 Py_DECREF(interned);
4835 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004836}