blob: 78713232ee8b80816cc2b044398d895ebf30c17e [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Serhiy Storchaka01b3a082013-01-25 23:30:50 +0200729 /* skip \x */
730 if (s < end && isxdigit(Py_CHARMASK(s[0])))
731 s++; /* and a hexdigit */
732 break;
Christian Heimes44720832008-05-26 13:01:01 +0000733#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
Christian Heimes44720832008-05-26 13:01:01 +0000743#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 default:
745 *p++ = '\\';
746 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200747 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000748 UTF-8 bytes may follow. */
749 }
750 }
751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
753 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000754 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 Py_DECREF(v);
756 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
Christian Heimes44720832008-05-26 13:01:01 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
Christian Heimes44720832008-05-26 13:01:01 +0000884#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000886#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000887 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000888#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 Py_END_ALLOW_THREADS
890 return 0;
891 }
Christian Heimes44720832008-05-26 13:01:01 +0000892
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
936 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000945
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000952
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000953 p = PyString_AS_STRING(v);
954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
974 }
975 else
976 *p++ = c;
977 }
978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979 *p++ = quote;
980 *p = '\0';
981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
983 return v;
984 }
Christian Heimes44720832008-05-26 13:01:01 +0000985}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000986
987static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000988string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000990 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991}
992
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000994string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000995{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
998 Py_INCREF(s);
999 return s;
1000 }
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
Christian Heimes44720832008-05-26 13:01:01 +00001008static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001009string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001010{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001012}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013
Christian Heimes44720832008-05-26 13:01:01 +00001014static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001015string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001016{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 register Py_ssize_t size;
1018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001023#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1030 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001031#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1038 }
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1041 }
1042 size = Py_SIZE(a) + Py_SIZE(b);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001054 /* Inline PyObject_NewVar */
1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1059 }
1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061 if (op == NULL)
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op, &PyString_Type, size);
1064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001070#undef b
1071}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001072
Christian Heimes44720832008-05-26 13:01:01 +00001073static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001075{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
1079 register PyStringObject *op;
1080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1085 */
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093 Py_INCREF(a);
1094 return (PyObject *)a;
1095 }
1096 nbytes = (size_t)size;
1097 if (nbytes + PyStringObject_SIZE <= nbytes) {
1098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1101 }
1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103 if (op == NULL)
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op, &PyString_Type, size);
1106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1112 }
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1117 }
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1122 }
1123 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001124}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001125
Christian Heimes44720832008-05-26 13:01:01 +00001126/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001129string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001131 /* j -- may be negative! */
1132{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1143 }
1144 if (j < i)
1145 j = i;
1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001147}
1148
1149static int
1150string_contains(PyObject *str_obj, PyObject *sub_obj)
1151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001152 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001153#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001156#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001157 if (!PyString_Check(sub_obj)) {
1158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1162 }
1163 }
Christian Heimes44720832008-05-26 13:01:01 +00001164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001166}
1167
1168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001169string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001170{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1176 }
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
1180 v = PyString_FromStringAndSize(&pchar, 1);
1181 else {
Christian Heimes44720832008-05-26 13:01:01 +00001182#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001183 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001185 Py_INCREF(v);
1186 }
1187 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001188}
1189
1190static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001197
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a) && PyString_Check(b))) {
1200 result = Py_NotImplemented;
1201 goto out;
1202 }
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1211 }
1212 }
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1222 }
1223 goto out;
1224 }
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1245 }
1246 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001247 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001248 Py_INCREF(result);
1249 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001250}
1251
1252int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001254{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
1257 return Py_SIZE(a) == Py_SIZE(b)
1258 && *a->ob_sval == *b->ob_sval
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001260}
1261
1262static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001263string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001264{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001268
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001269#ifdef Py_DEBUG
Benjamin Peterson26da9202012-02-21 11:08:50 -05001270 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001271#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001272 if (a->ob_shash != -1)
1273 return a->ob_shash;
1274 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001275 /*
1276 We make the hash of the empty string be 0, rather than using
1277 (prefix ^ suffix), since this slightly obfuscates the hash secret
1278 */
1279 if (len == 0) {
1280 a->ob_shash = 0;
1281 return 0;
1282 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001283 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001284 x = _Py_HashSecret.prefix;
1285 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 while (--len >= 0)
1287 x = (1000003*x) ^ *p++;
1288 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001289 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001290 if (x == -1)
1291 x = -2;
1292 a->ob_shash = x;
1293 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001294}
1295
1296static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001297string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001298{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001299 if (PyIndex_Check(item)) {
1300 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301 if (i == -1 && PyErr_Occurred())
1302 return NULL;
1303 if (i < 0)
1304 i += PyString_GET_SIZE(self);
1305 return string_item(self, i);
1306 }
1307 else if (PySlice_Check(item)) {
1308 Py_ssize_t start, stop, step, slicelength, cur, i;
1309 char* source_buf;
1310 char* result_buf;
1311 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001312
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001313 if (PySlice_GetIndicesEx((PySliceObject*)item,
1314 PyString_GET_SIZE(self),
1315 &start, &stop, &step, &slicelength) < 0) {
1316 return NULL;
1317 }
Christian Heimes44720832008-05-26 13:01:01 +00001318
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001319 if (slicelength <= 0) {
1320 return PyString_FromStringAndSize("", 0);
1321 }
1322 else if (start == 0 && step == 1 &&
1323 slicelength == PyString_GET_SIZE(self) &&
1324 PyString_CheckExact(self)) {
1325 Py_INCREF(self);
1326 return (PyObject *)self;
1327 }
1328 else if (step == 1) {
1329 return PyString_FromStringAndSize(
1330 PyString_AS_STRING(self) + start,
1331 slicelength);
1332 }
1333 else {
1334 source_buf = PyString_AsString((PyObject*)self);
1335 result_buf = (char *)PyMem_Malloc(slicelength);
1336 if (result_buf == NULL)
1337 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001338
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001339 for (cur = start, i = 0; i < slicelength;
1340 cur += step, i++) {
1341 result_buf[i] = source_buf[cur];
1342 }
Christian Heimes44720832008-05-26 13:01:01 +00001343
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 result = PyString_FromStringAndSize(result_buf,
1345 slicelength);
1346 PyMem_Free(result_buf);
1347 return result;
1348 }
1349 }
1350 else {
1351 PyErr_Format(PyExc_TypeError,
1352 "string indices must be integers, not %.200s",
1353 Py_TYPE(item)->tp_name);
1354 return NULL;
1355 }
Christian Heimes44720832008-05-26 13:01:01 +00001356}
1357
1358static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001359string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001360{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001361 if ( index != 0 ) {
1362 PyErr_SetString(PyExc_SystemError,
1363 "accessing non-existent string segment");
1364 return -1;
1365 }
1366 *ptr = (void *)self->ob_sval;
1367 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001368}
1369
1370static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001371string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001372{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001373 PyErr_SetString(PyExc_TypeError,
1374 "Cannot use string as modifiable buffer");
1375 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001376}
1377
1378static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001379string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001380{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001381 if ( lenp )
1382 *lenp = Py_SIZE(self);
1383 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001384}
1385
1386static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001387string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001388{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001389 if ( index != 0 ) {
1390 PyErr_SetString(PyExc_SystemError,
1391 "accessing non-existent string segment");
1392 return -1;
1393 }
1394 *ptr = self->ob_sval;
1395 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001396}
1397
1398static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001399string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001400{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 return PyBuffer_FillInfo(view, (PyObject*)self,
1402 (void *)self->ob_sval, Py_SIZE(self),
1403 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001404}
1405
1406static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (lenfunc)string_length, /*sq_length*/
1408 (binaryfunc)string_concat, /*sq_concat*/
1409 (ssizeargfunc)string_repeat, /*sq_repeat*/
1410 (ssizeargfunc)string_item, /*sq_item*/
1411 (ssizessizeargfunc)string_slice, /*sq_slice*/
1412 0, /*sq_ass_item*/
1413 0, /*sq_ass_slice*/
1414 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001415};
1416
1417static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001418 (lenfunc)string_length,
1419 (binaryfunc)string_subscript,
1420 0,
Christian Heimes44720832008-05-26 13:01:01 +00001421};
1422
1423static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 (readbufferproc)string_buffer_getreadbuf,
1425 (writebufferproc)string_buffer_getwritebuf,
1426 (segcountproc)string_buffer_getsegcount,
1427 (charbufferproc)string_buffer_getcharbuf,
1428 (getbufferproc)string_buffer_getbuffer,
1429 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001430};
1431
1432
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001433
Christian Heimes44720832008-05-26 13:01:01 +00001434#define LEFTSTRIP 0
1435#define RIGHTSTRIP 1
1436#define BOTHSTRIP 2
1437
1438/* Arrays indexed by above */
1439static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440
1441#define STRIPNAME(i) (stripformat[i]+3)
1442
Christian Heimes1a6387e2008-03-26 12:49:49 +00001443PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001444"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001445\n\
Christian Heimes44720832008-05-26 13:01:01 +00001446Return a list of the words in the string S, using sep as the\n\
1447delimiter string. If maxsplit is given, at most maxsplit\n\
1448splits are done. If sep is not specified or is None, any\n\
1449whitespace string is a separator and empty strings are removed\n\
1450from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
1452static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001453string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001454{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001455 Py_ssize_t len = PyString_GET_SIZE(self), n;
1456 Py_ssize_t maxsplit = -1;
1457 const char *s = PyString_AS_STRING(self), *sub;
1458 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001460 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461 return NULL;
1462 if (maxsplit < 0)
1463 maxsplit = PY_SSIZE_T_MAX;
1464 if (subobj == Py_None)
1465 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466 if (PyString_Check(subobj)) {
1467 sub = PyString_AS_STRING(subobj);
1468 n = PyString_GET_SIZE(subobj);
1469 }
Christian Heimes44720832008-05-26 13:01:01 +00001470#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001471 else if (PyUnicode_Check(subobj))
1472 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001473#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001474 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478}
1479
1480PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001481"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001483Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001484the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001485found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
1487static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001488string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001489{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001490 const char *sep;
1491 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001493 if (PyString_Check(sep_obj)) {
1494 sep = PyString_AS_STRING(sep_obj);
1495 sep_len = PyString_GET_SIZE(sep_obj);
1496 }
Christian Heimes44720832008-05-26 13:01:01 +00001497#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001498 else if (PyUnicode_Check(sep_obj))
1499 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001500#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001501 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 return stringlib_partition(
1505 (PyObject*) self,
1506 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507 sep_obj, sep, sep_len
1508 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509}
1510
1511PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001512"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001513\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001514Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001515the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001516separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
1518static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001519string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001520{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001521 const char *sep;
1522 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001524 if (PyString_Check(sep_obj)) {
1525 sep = PyString_AS_STRING(sep_obj);
1526 sep_len = PyString_GET_SIZE(sep_obj);
1527 }
Christian Heimes44720832008-05-26 13:01:01 +00001528#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001529 else if (PyUnicode_Check(sep_obj))
1530 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001531#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001532 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001534
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001535 return stringlib_rpartition(
1536 (PyObject*) self,
1537 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538 sep_obj, sep, sep_len
1539 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540}
1541
Christian Heimes1a6387e2008-03-26 12:49:49 +00001542PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001543"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001544\n\
Christian Heimes44720832008-05-26 13:01:01 +00001545Return a list of the words in the string S, using sep as the\n\
1546delimiter string, starting at the end of the string and working\n\
1547to the front. If maxsplit is given, at most maxsplit splits are\n\
1548done. If sep is not specified or is None, any whitespace string\n\
1549is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001550
1551static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001552string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001553{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001554 Py_ssize_t len = PyString_GET_SIZE(self), n;
1555 Py_ssize_t maxsplit = -1;
1556 const char *s = PyString_AS_STRING(self), *sub;
1557 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001559 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560 return NULL;
1561 if (maxsplit < 0)
1562 maxsplit = PY_SSIZE_T_MAX;
1563 if (subobj == Py_None)
1564 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565 if (PyString_Check(subobj)) {
1566 sub = PyString_AS_STRING(subobj);
1567 n = PyString_GET_SIZE(subobj);
1568 }
Christian Heimes44720832008-05-26 13:01:01 +00001569#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001570 else if (PyUnicode_Check(subobj))
1571 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001572#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001573 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001577}
1578
1579
1580PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001581"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001582\n\
1583Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001584iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001585
1586static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001587string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001588{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001589 char *sep = PyString_AS_STRING(self);
1590 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591 PyObject *res = NULL;
1592 char *p;
1593 Py_ssize_t seqlen = 0;
1594 size_t sz = 0;
1595 Py_ssize_t i;
1596 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001597
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001598 seq = PySequence_Fast(orig, "");
1599 if (seq == NULL) {
1600 return NULL;
1601 }
Christian Heimes44720832008-05-26 13:01:01 +00001602
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001603 seqlen = PySequence_Size(seq);
1604 if (seqlen == 0) {
1605 Py_DECREF(seq);
1606 return PyString_FromString("");
1607 }
1608 if (seqlen == 1) {
1609 item = PySequence_Fast_GET_ITEM(seq, 0);
1610 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611 Py_INCREF(item);
1612 Py_DECREF(seq);
1613 return item;
1614 }
1615 }
Christian Heimes44720832008-05-26 13:01:01 +00001616
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001617 /* There are at least two things to join, or else we have a subclass
1618 * of the builtin types in the sequence.
1619 * Do a pre-pass to figure out the total amount of space we'll
1620 * need (sz), see whether any argument is absurd, and defer to
1621 * the Unicode join if appropriate.
1622 */
1623 for (i = 0; i < seqlen; i++) {
1624 const size_t old_sz = sz;
1625 item = PySequence_Fast_GET_ITEM(seq, i);
1626 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001627#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001628 if (PyUnicode_Check(item)) {
1629 /* Defer to Unicode join.
1630 * CAUTION: There's no gurantee that the
1631 * original sequence can be iterated over
1632 * again, so we must pass seq here.
1633 */
1634 PyObject *result;
1635 result = PyUnicode_Join((PyObject *)self, seq);
1636 Py_DECREF(seq);
1637 return result;
1638 }
Christian Heimes44720832008-05-26 13:01:01 +00001639#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001640 PyErr_Format(PyExc_TypeError,
1641 "sequence item %zd: expected string,"
1642 " %.80s found",
1643 i, Py_TYPE(item)->tp_name);
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
1647 sz += PyString_GET_SIZE(item);
1648 if (i != 0)
1649 sz += seplen;
1650 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651 PyErr_SetString(PyExc_OverflowError,
1652 "join() result is too long for a Python string");
1653 Py_DECREF(seq);
1654 return NULL;
1655 }
1656 }
Christian Heimes44720832008-05-26 13:01:01 +00001657
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001658 /* Allocate result space. */
1659 res = PyString_FromStringAndSize((char*)NULL, sz);
1660 if (res == NULL) {
1661 Py_DECREF(seq);
1662 return NULL;
1663 }
Christian Heimes44720832008-05-26 13:01:01 +00001664
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001665 /* Catenate everything. */
1666 p = PyString_AS_STRING(res);
1667 for (i = 0; i < seqlen; ++i) {
1668 size_t n;
1669 item = PySequence_Fast_GET_ITEM(seq, i);
1670 n = PyString_GET_SIZE(item);
1671 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672 p += n;
1673 if (i < seqlen - 1) {
1674 Py_MEMCPY(p, sep, seplen);
1675 p += seplen;
1676 }
1677 }
Christian Heimes44720832008-05-26 13:01:01 +00001678
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001679 Py_DECREF(seq);
1680 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001681}
1682
1683PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001684_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001685{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001686 assert(sep != NULL && PyString_Check(sep));
1687 assert(x != NULL);
1688 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001689}
1690
Antoine Pitrou64672132010-01-13 07:55:48 +00001691/* helper macro to fixup start/end slice values */
1692#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001693 if (end > len) \
1694 end = len; \
1695 else if (end < 0) { \
1696 end += len; \
1697 if (end < 0) \
1698 end = 0; \
1699 } \
1700 if (start < 0) { \
1701 start += len; \
1702 if (start < 0) \
1703 start = 0; \
1704 }
Christian Heimes44720832008-05-26 13:01:01 +00001705
1706Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001707string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001708{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001709 PyObject *subobj;
1710 const char *sub;
1711 Py_ssize_t sub_len;
1712 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001713
Jesus Cea44e81682011-04-20 16:39:15 +02001714 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001716 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001717
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001718 if (PyString_Check(subobj)) {
1719 sub = PyString_AS_STRING(subobj);
1720 sub_len = PyString_GET_SIZE(subobj);
1721 }
Christian Heimes44720832008-05-26 13:01:01 +00001722#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001723 else if (PyUnicode_Check(subobj))
1724 return PyUnicode_Find(
1725 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001726#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001727 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728 /* XXX - the "expected a character buffer object" is pretty
1729 confusing for a non-expert. remap to something else ? */
1730 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001731
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001732 if (dir > 0)
1733 return stringlib_find_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1736 else
1737 return stringlib_rfind_slice(
1738 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001740}
1741
1742
1743PyDoc_STRVAR(find__doc__,
1744"S.find(sub [,start [,end]]) -> int\n\
1745\n\
1746Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001747such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001748arguments start and end are interpreted as in slice notation.\n\
1749\n\
1750Return -1 on failure.");
1751
1752static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001753string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001754{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001755 Py_ssize_t result = string_find_internal(self, args, +1);
1756 if (result == -2)
1757 return NULL;
1758 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001759}
1760
1761
1762PyDoc_STRVAR(index__doc__,
1763"S.index(sub [,start [,end]]) -> int\n\
1764\n\
1765Like S.find() but raise ValueError when the substring is not found.");
1766
1767static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001768string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001769{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001770 Py_ssize_t result = string_find_internal(self, args, +1);
1771 if (result == -2)
1772 return NULL;
1773 if (result == -1) {
1774 PyErr_SetString(PyExc_ValueError,
1775 "substring not found");
1776 return NULL;
1777 }
1778 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001779}
1780
1781
1782PyDoc_STRVAR(rfind__doc__,
1783"S.rfind(sub [,start [,end]]) -> int\n\
1784\n\
1785Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001786such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001787arguments start and end are interpreted as in slice notation.\n\
1788\n\
1789Return -1 on failure.");
1790
1791static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001792string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001793{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001794 Py_ssize_t result = string_find_internal(self, args, -1);
1795 if (result == -2)
1796 return NULL;
1797 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001798}
1799
1800
1801PyDoc_STRVAR(rindex__doc__,
1802"S.rindex(sub [,start [,end]]) -> int\n\
1803\n\
1804Like S.rfind() but raise ValueError when the substring is not found.");
1805
1806static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001807string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001808{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001809 Py_ssize_t result = string_find_internal(self, args, -1);
1810 if (result == -2)
1811 return NULL;
1812 if (result == -1) {
1813 PyErr_SetString(PyExc_ValueError,
1814 "substring not found");
1815 return NULL;
1816 }
1817 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001818}
1819
1820
1821Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001822do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001823{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001824 char *s = PyString_AS_STRING(self);
1825 Py_ssize_t len = PyString_GET_SIZE(self);
1826 char *sep = PyString_AS_STRING(sepobj);
1827 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 i = 0;
1831 if (striptype != RIGHTSTRIP) {
1832 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833 i++;
1834 }
1835 }
Christian Heimes44720832008-05-26 13:01:01 +00001836
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001837 j = len;
1838 if (striptype != LEFTSTRIP) {
1839 do {
1840 j--;
1841 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842 j++;
1843 }
Christian Heimes44720832008-05-26 13:01:01 +00001844
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001845 if (i == 0 && j == len && PyString_CheckExact(self)) {
1846 Py_INCREF(self);
1847 return (PyObject*)self;
1848 }
1849 else
1850 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001851}
1852
1853
1854Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001855do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001857 char *s = PyString_AS_STRING(self);
1858 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001859
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001860 i = 0;
1861 if (striptype != RIGHTSTRIP) {
1862 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863 i++;
1864 }
1865 }
Christian Heimes44720832008-05-26 13:01:01 +00001866
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001867 j = len;
1868 if (striptype != LEFTSTRIP) {
1869 do {
1870 j--;
1871 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872 j++;
1873 }
Christian Heimes44720832008-05-26 13:01:01 +00001874
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001875 if (i == 0 && j == len && PyString_CheckExact(self)) {
1876 Py_INCREF(self);
1877 return (PyObject*)self;
1878 }
1879 else
1880 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001881}
1882
1883
1884Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001885do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001886{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001887 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001891
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001892 if (sep != NULL && sep != Py_None) {
1893 if (PyString_Check(sep))
1894 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001895#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001896 else if (PyUnicode_Check(sep)) {
1897 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898 PyObject *res;
1899 if (uniself==NULL)
1900 return NULL;
1901 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902 striptype, sep);
1903 Py_DECREF(uniself);
1904 return res;
1905 }
Christian Heimes44720832008-05-26 13:01:01 +00001906#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001907 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001908#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001909 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001910#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001911 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001912#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001913 STRIPNAME(striptype));
1914 return NULL;
1915 }
Christian Heimes44720832008-05-26 13:01:01 +00001916
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001917 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001918}
1919
1920
1921PyDoc_STRVAR(strip__doc__,
1922"S.strip([chars]) -> string or unicode\n\
1923\n\
1924Return a copy of the string S with leading and trailing\n\
1925whitespace removed.\n\
1926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
1928
1929static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001930string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001931{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, BOTHSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001936}
1937
1938
1939PyDoc_STRVAR(lstrip__doc__,
1940"S.lstrip([chars]) -> string or unicode\n\
1941\n\
1942Return a copy of the string S with leading whitespace removed.\n\
1943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
1945
1946static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001947string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001948{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, LEFTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001953}
1954
1955
1956PyDoc_STRVAR(rstrip__doc__,
1957"S.rstrip([chars]) -> string or unicode\n\
1958\n\
1959Return a copy of the string S with trailing whitespace removed.\n\
1960If chars is given and not None, remove characters in chars instead.\n\
1961If chars is unicode, S will be converted to unicode before stripping");
1962
1963static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001964string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001965{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001966 if (PyTuple_GET_SIZE(args) == 0)
1967 return do_strip(self, RIGHTSTRIP); /* Common case */
1968 else
1969 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001970}
1971
1972
1973PyDoc_STRVAR(lower__doc__,
1974"S.lower() -> string\n\
1975\n\
1976Return a copy of the string S converted to lowercase.");
1977
1978/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979#ifndef _tolower
1980#define _tolower tolower
1981#endif
1982
1983static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001986 char *s;
1987 Py_ssize_t i, n = PyString_GET_SIZE(self);
1988 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001989
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001990 newobj = PyString_FromStringAndSize(NULL, n);
1991 if (!newobj)
1992 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001993
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001994 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001995
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001996 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001997
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001998 for (i = 0; i < n; i++) {
1999 int c = Py_CHARMASK(s[i]);
2000 if (isupper(c))
2001 s[i] = _tolower(c);
2002 }
Christian Heimes44720832008-05-26 13:01:01 +00002003
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002004 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002005}
2006
2007PyDoc_STRVAR(upper__doc__,
2008"S.upper() -> string\n\
2009\n\
2010Return a copy of the string S converted to uppercase.");
2011
2012#ifndef _toupper
2013#define _toupper toupper
2014#endif
2015
2016static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002017string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002018{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002019 char *s;
2020 Py_ssize_t i, n = PyString_GET_SIZE(self);
2021 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002023 newobj = PyString_FromStringAndSize(NULL, n);
2024 if (!newobj)
2025 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002027 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002029 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002030
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002031 for (i = 0; i < n; i++) {
2032 int c = Py_CHARMASK(s[i]);
2033 if (islower(c))
2034 s[i] = _toupper(c);
2035 }
Christian Heimes44720832008-05-26 13:01:01 +00002036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002037 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002038}
2039
2040PyDoc_STRVAR(title__doc__,
2041"S.title() -> string\n\
2042\n\
2043Return a titlecased version of S, i.e. words start with uppercase\n\
2044characters, all remaining cased characters have lowercase.");
2045
2046static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002047string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002048{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002049 char *s = PyString_AS_STRING(self), *s_new;
2050 Py_ssize_t i, n = PyString_GET_SIZE(self);
2051 int previous_is_cased = 0;
2052 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002054 newobj = PyString_FromStringAndSize(NULL, n);
2055 if (newobj == NULL)
2056 return NULL;
2057 s_new = PyString_AsString(newobj);
2058 for (i = 0; i < n; i++) {
2059 int c = Py_CHARMASK(*s++);
2060 if (islower(c)) {
2061 if (!previous_is_cased)
2062 c = toupper(c);
2063 previous_is_cased = 1;
2064 } else if (isupper(c)) {
2065 if (previous_is_cased)
2066 c = tolower(c);
2067 previous_is_cased = 1;
2068 } else
2069 previous_is_cased = 0;
2070 *s_new++ = c;
2071 }
2072 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002073}
2074
2075PyDoc_STRVAR(capitalize__doc__,
2076"S.capitalize() -> string\n\
2077\n\
2078Return a copy of the string S with only its first character\n\
2079capitalized.");
2080
2081static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002082string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002083{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002084 char *s = PyString_AS_STRING(self), *s_new;
2085 Py_ssize_t i, n = PyString_GET_SIZE(self);
2086 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002087
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002088 newobj = PyString_FromStringAndSize(NULL, n);
2089 if (newobj == NULL)
2090 return NULL;
2091 s_new = PyString_AsString(newobj);
2092 if (0 < n) {
2093 int c = Py_CHARMASK(*s++);
2094 if (islower(c))
2095 *s_new = toupper(c);
2096 else
2097 *s_new = c;
2098 s_new++;
2099 }
2100 for (i = 1; i < n; i++) {
2101 int c = Py_CHARMASK(*s++);
2102 if (isupper(c))
2103 *s_new = tolower(c);
2104 else
2105 *s_new = c;
2106 s_new++;
2107 }
2108 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002109}
2110
2111
2112PyDoc_STRVAR(count__doc__,
2113"S.count(sub[, start[, end]]) -> int\n\
2114\n\
2115Return the number of non-overlapping occurrences of substring sub in\n\
2116string S[start:end]. Optional arguments start and end are interpreted\n\
2117as in slice notation.");
2118
2119static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002120string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002121{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002122 PyObject *sub_obj;
2123 const char *str = PyString_AS_STRING(self), *sub;
2124 Py_ssize_t sub_len;
2125 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002126
Jesus Cea44e81682011-04-20 16:39:15 +02002127 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002130 if (PyString_Check(sub_obj)) {
2131 sub = PyString_AS_STRING(sub_obj);
2132 sub_len = PyString_GET_SIZE(sub_obj);
2133 }
Christian Heimes44720832008-05-26 13:01:01 +00002134#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002135 else if (PyUnicode_Check(sub_obj)) {
2136 Py_ssize_t count;
2137 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138 if (count == -1)
2139 return NULL;
2140 else
2141 return PyInt_FromSsize_t(count);
2142 }
Christian Heimes44720832008-05-26 13:01:01 +00002143#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002144 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002147 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002148
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002149 return PyInt_FromSsize_t(
2150 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151 );
Christian Heimes44720832008-05-26 13:01:01 +00002152}
2153
2154PyDoc_STRVAR(swapcase__doc__,
2155"S.swapcase() -> string\n\
2156\n\
2157Return a copy of the string S with uppercase characters\n\
2158converted to lowercase and vice versa.");
2159
2160static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002161string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002162{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002163 char *s = PyString_AS_STRING(self), *s_new;
2164 Py_ssize_t i, n = PyString_GET_SIZE(self);
2165 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002166
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002167 newobj = PyString_FromStringAndSize(NULL, n);
2168 if (newobj == NULL)
2169 return NULL;
2170 s_new = PyString_AsString(newobj);
2171 for (i = 0; i < n; i++) {
2172 int c = Py_CHARMASK(*s++);
2173 if (islower(c)) {
2174 *s_new = toupper(c);
2175 }
2176 else if (isupper(c)) {
2177 *s_new = tolower(c);
2178 }
2179 else
2180 *s_new = c;
2181 s_new++;
2182 }
2183 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002184}
2185
2186
2187PyDoc_STRVAR(translate__doc__,
2188"S.translate(table [,deletechars]) -> string\n\
2189\n\
2190Return a copy of the string S, where all characters occurring\n\
2191in the optional argument deletechars are removed, and the\n\
2192remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002193translation table, which must be a string of length 256 or None.\n\
2194If the table argument is None, no translation is applied and\n\
2195the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002196
2197static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002198string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002199{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002200 register char *input, *output;
2201 const char *table;
2202 register Py_ssize_t i, c, changed = 0;
2203 PyObject *input_obj = (PyObject*)self;
2204 const char *output_start, *del_table=NULL;
2205 Py_ssize_t inlen, tablen, dellen = 0;
2206 PyObject *result;
2207 int trans_table[256];
2208 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002210 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211 &tableobj, &delobj))
2212 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002213
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002214 if (PyString_Check(tableobj)) {
2215 table = PyString_AS_STRING(tableobj);
2216 tablen = PyString_GET_SIZE(tableobj);
2217 }
2218 else if (tableobj == Py_None) {
2219 table = NULL;
2220 tablen = 256;
2221 }
Christian Heimes44720832008-05-26 13:01:01 +00002222#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002223 else if (PyUnicode_Check(tableobj)) {
2224 /* Unicode .translate() does not support the deletechars
2225 parameter; instead a mapping to None will cause characters
2226 to be deleted. */
2227 if (delobj != NULL) {
2228 PyErr_SetString(PyExc_TypeError,
2229 "deletions are implemented differently for unicode");
2230 return NULL;
2231 }
2232 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233 }
Christian Heimes44720832008-05-26 13:01:01 +00002234#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002235 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002237
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002238 if (tablen != 256) {
2239 PyErr_SetString(PyExc_ValueError,
2240 "translation table must be 256 characters long");
2241 return NULL;
2242 }
Christian Heimes44720832008-05-26 13:01:01 +00002243
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002244 if (delobj != NULL) {
2245 if (PyString_Check(delobj)) {
2246 del_table = PyString_AS_STRING(delobj);
2247 dellen = PyString_GET_SIZE(delobj);
2248 }
Christian Heimes44720832008-05-26 13:01:01 +00002249#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002250 else if (PyUnicode_Check(delobj)) {
2251 PyErr_SetString(PyExc_TypeError,
2252 "deletions are implemented differently for unicode");
2253 return NULL;
2254 }
Christian Heimes44720832008-05-26 13:01:01 +00002255#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002256 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257 return NULL;
2258 }
2259 else {
2260 del_table = NULL;
2261 dellen = 0;
2262 }
Christian Heimes44720832008-05-26 13:01:01 +00002263
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002264 inlen = PyString_GET_SIZE(input_obj);
2265 result = PyString_FromStringAndSize((char *)NULL, inlen);
2266 if (result == NULL)
2267 return NULL;
2268 output_start = output = PyString_AsString(result);
2269 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002270
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002271 if (dellen == 0 && table != NULL) {
2272 /* If no deletions are required, use faster code */
2273 for (i = inlen; --i >= 0; ) {
2274 c = Py_CHARMASK(*input++);
2275 if (Py_CHARMASK((*output++ = table[c])) != c)
2276 changed = 1;
2277 }
2278 if (changed || !PyString_CheckExact(input_obj))
2279 return result;
2280 Py_DECREF(result);
2281 Py_INCREF(input_obj);
2282 return input_obj;
2283 }
Christian Heimes44720832008-05-26 13:01:01 +00002284
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002285 if (table == NULL) {
2286 for (i = 0; i < 256; i++)
2287 trans_table[i] = Py_CHARMASK(i);
2288 } else {
2289 for (i = 0; i < 256; i++)
2290 trans_table[i] = Py_CHARMASK(table[i]);
2291 }
Christian Heimes44720832008-05-26 13:01:01 +00002292
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002293 for (i = 0; i < dellen; i++)
2294 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002296 for (i = inlen; --i >= 0; ) {
2297 c = Py_CHARMASK(*input++);
2298 if (trans_table[c] != -1)
2299 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300 continue;
2301 changed = 1;
2302 }
2303 if (!changed && PyString_CheckExact(input_obj)) {
2304 Py_DECREF(result);
2305 Py_INCREF(input_obj);
2306 return input_obj;
2307 }
2308 /* Fix the size of the resulting string */
2309 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310 return NULL;
2311 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002312}
2313
2314
Christian Heimes44720832008-05-26 13:01:01 +00002315/* find and count characters and substrings */
2316
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002317#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002318 ((char *)memchr((const void *)(target), c, target_len))
2319
2320/* String ops must return a string. */
2321/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002322Py_LOCAL(PyStringObject *)
2323return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002324{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002325 if (PyString_CheckExact(self)) {
2326 Py_INCREF(self);
2327 return self;
2328 }
2329 return (PyStringObject *)PyString_FromStringAndSize(
2330 PyString_AS_STRING(self),
2331 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002332}
2333
2334Py_LOCAL_INLINE(Py_ssize_t)
2335countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2336{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 Py_ssize_t count=0;
2338 const char *start=target;
2339 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002340
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002341 while ( (start=findchar(start, end-start, c)) != NULL ) {
2342 count++;
2343 if (count >= maxcount)
2344 break;
2345 start += 1;
2346 }
2347 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002348}
2349
Christian Heimes44720832008-05-26 13:01:01 +00002350
2351/* Algorithms for different cases of string replacement */
2352
2353/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002354Py_LOCAL(PyStringObject *)
2355replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002356 const char *to_s, Py_ssize_t to_len,
2357 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002358{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002359 char *self_s, *result_s;
2360 Py_ssize_t self_len, result_len;
2361 Py_ssize_t count, i, product;
2362 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002363
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002364 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002365
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002366 /* 1 at the end plus 1 after every character */
2367 count = self_len+1;
2368 if (maxcount < count)
2369 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002370
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002371 /* Check for overflow */
2372 /* result_len = count * to_len + self_len; */
2373 product = count * to_len;
2374 if (product / to_len != count) {
2375 PyErr_SetString(PyExc_OverflowError,
2376 "replace string is too long");
2377 return NULL;
2378 }
2379 result_len = product + self_len;
2380 if (result_len < 0) {
2381 PyErr_SetString(PyExc_OverflowError,
2382 "replace string is too long");
2383 return NULL;
2384 }
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 if (! (result = (PyStringObject *)
2387 PyString_FromStringAndSize(NULL, result_len)) )
2388 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002389
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002390 self_s = PyString_AS_STRING(self);
2391 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002392
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002393 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002394
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002395 /* Lay the first one down (guaranteed this will occur) */
2396 Py_MEMCPY(result_s, to_s, to_len);
2397 result_s += to_len;
2398 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002399
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002400 for (i=0; i<count; i++) {
2401 *result_s++ = *self_s++;
2402 Py_MEMCPY(result_s, to_s, to_len);
2403 result_s += to_len;
2404 }
2405
2406 /* Copy the rest of the original string */
2407 Py_MEMCPY(result_s, self_s, self_len-i);
2408
2409 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002410}
2411
2412/* Special case for deleting a single character */
2413/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002414Py_LOCAL(PyStringObject *)
2415replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002416 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002417{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002418 char *self_s, *result_s;
2419 char *start, *next, *end;
2420 Py_ssize_t self_len, result_len;
2421 Py_ssize_t count;
2422 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002423
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002424 self_len = PyString_GET_SIZE(self);
2425 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002426
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002427 count = countchar(self_s, self_len, from_c, maxcount);
2428 if (count == 0) {
2429 return return_self(self);
2430 }
Christian Heimes44720832008-05-26 13:01:01 +00002431
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002432 result_len = self_len - count; /* from_len == 1 */
2433 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002434
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002435 if ( (result = (PyStringObject *)
2436 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2437 return NULL;
2438 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002439
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002440 start = self_s;
2441 end = self_s + self_len;
2442 while (count-- > 0) {
2443 next = findchar(start, end-start, from_c);
2444 if (next == NULL)
2445 break;
2446 Py_MEMCPY(result_s, start, next-start);
2447 result_s += (next-start);
2448 start = next+1;
2449 }
2450 Py_MEMCPY(result_s, start, end-start);
2451
2452 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002453}
2454
2455/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2456
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002457Py_LOCAL(PyStringObject *)
2458replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002459 const char *from_s, Py_ssize_t from_len,
2460 Py_ssize_t maxcount) {
2461 char *self_s, *result_s;
2462 char *start, *next, *end;
2463 Py_ssize_t self_len, result_len;
2464 Py_ssize_t count, offset;
2465 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002466
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002467 self_len = PyString_GET_SIZE(self);
2468 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002469
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002470 count = stringlib_count(self_s, self_len,
2471 from_s, from_len,
2472 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002473
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002474 if (count == 0) {
2475 /* no matches */
2476 return return_self(self);
2477 }
Christian Heimes44720832008-05-26 13:01:01 +00002478
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002479 result_len = self_len - (count * from_len);
2480 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002481
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002482 if ( (result = (PyStringObject *)
2483 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2484 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002485
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002486 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002487
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002488 start = self_s;
2489 end = self_s + self_len;
2490 while (count-- > 0) {
2491 offset = stringlib_find(start, end-start,
2492 from_s, from_len,
2493 0);
2494 if (offset == -1)
2495 break;
2496 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002497
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002498 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002499
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002500 result_s += (next-start);
2501 start = next+from_len;
2502 }
2503 Py_MEMCPY(result_s, start, end-start);
2504 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002505}
2506
2507/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002508Py_LOCAL(PyStringObject *)
2509replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002510 char from_c, char to_c,
2511 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002512{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002513 char *self_s, *result_s, *start, *end, *next;
2514 Py_ssize_t self_len;
2515 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002517 /* The result string will be the same size */
2518 self_s = PyString_AS_STRING(self);
2519 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002520
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002521 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002522
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002523 if (next == NULL) {
2524 /* No matches; return the original string */
2525 return return_self(self);
2526 }
Christian Heimes44720832008-05-26 13:01:01 +00002527
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002528 /* Need to make a new string */
2529 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2530 if (result == NULL)
2531 return NULL;
2532 result_s = PyString_AS_STRING(result);
2533 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002534
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002535 /* change everything in-place, starting with this one */
2536 start = result_s + (next-self_s);
2537 *start = to_c;
2538 start++;
2539 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002540
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002541 while (--maxcount > 0) {
2542 next = findchar(start, end-start, from_c);
2543 if (next == NULL)
2544 break;
2545 *next = to_c;
2546 start = next+1;
2547 }
Christian Heimes44720832008-05-26 13:01:01 +00002548
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002549 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002550}
2551
2552/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002553Py_LOCAL(PyStringObject *)
2554replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002555 const char *from_s, Py_ssize_t from_len,
2556 const char *to_s, Py_ssize_t to_len,
2557 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002558{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002559 char *result_s, *start, *end;
2560 char *self_s;
2561 Py_ssize_t self_len, offset;
2562 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002563
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002564 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002565
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002566 self_s = PyString_AS_STRING(self);
2567 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002568
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002569 offset = stringlib_find(self_s, self_len,
2570 from_s, from_len,
2571 0);
2572 if (offset == -1) {
2573 /* No matches; return the original string */
2574 return return_self(self);
2575 }
Christian Heimes44720832008-05-26 13:01:01 +00002576
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002577 /* Need to make a new string */
2578 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2579 if (result == NULL)
2580 return NULL;
2581 result_s = PyString_AS_STRING(result);
2582 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002583
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002584 /* change everything in-place, starting with this one */
2585 start = result_s + offset;
2586 Py_MEMCPY(start, to_s, from_len);
2587 start += from_len;
2588 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002589
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002590 while ( --maxcount > 0) {
2591 offset = stringlib_find(start, end-start,
2592 from_s, from_len,
2593 0);
2594 if (offset==-1)
2595 break;
2596 Py_MEMCPY(start+offset, to_s, from_len);
2597 start += offset+from_len;
2598 }
Christian Heimes44720832008-05-26 13:01:01 +00002599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002600 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002601}
2602
2603/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002604Py_LOCAL(PyStringObject *)
2605replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002606 char from_c,
2607 const char *to_s, Py_ssize_t to_len,
2608 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002609{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002610 char *self_s, *result_s;
2611 char *start, *next, *end;
2612 Py_ssize_t self_len, result_len;
2613 Py_ssize_t count, product;
2614 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002615
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002616 self_s = PyString_AS_STRING(self);
2617 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002618
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002619 count = countchar(self_s, self_len, from_c, maxcount);
2620 if (count == 0) {
2621 /* no matches, return unchanged */
2622 return return_self(self);
2623 }
Christian Heimes44720832008-05-26 13:01:01 +00002624
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002625 /* use the difference between current and new, hence the "-1" */
2626 /* result_len = self_len + count * (to_len-1) */
2627 product = count * (to_len-1);
2628 if (product / (to_len-1) != count) {
2629 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630 return NULL;
2631 }
2632 result_len = self_len + product;
2633 if (result_len < 0) {
2634 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635 return NULL;
2636 }
Christian Heimes44720832008-05-26 13:01:01 +00002637
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002638 if ( (result = (PyStringObject *)
2639 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2640 return NULL;
2641 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002642
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002643 start = self_s;
2644 end = self_s + self_len;
2645 while (count-- > 0) {
2646 next = findchar(start, end-start, from_c);
2647 if (next == NULL)
2648 break;
Christian Heimes44720832008-05-26 13:01:01 +00002649
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002650 if (next == start) {
2651 /* replace with the 'to' */
2652 Py_MEMCPY(result_s, to_s, to_len);
2653 result_s += to_len;
2654 start += 1;
2655 } else {
2656 /* copy the unchanged old then the 'to' */
2657 Py_MEMCPY(result_s, start, next-start);
2658 result_s += (next-start);
2659 Py_MEMCPY(result_s, to_s, to_len);
2660 result_s += to_len;
2661 start = next+1;
2662 }
2663 }
2664 /* Copy the remainder of the remaining string */
2665 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002666
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002667 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002668}
2669
2670/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002671Py_LOCAL(PyStringObject *)
2672replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002673 const char *from_s, Py_ssize_t from_len,
2674 const char *to_s, Py_ssize_t to_len,
2675 Py_ssize_t maxcount) {
2676 char *self_s, *result_s;
2677 char *start, *next, *end;
2678 Py_ssize_t self_len, result_len;
2679 Py_ssize_t count, offset, product;
2680 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002681
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002682 self_s = PyString_AS_STRING(self);
2683 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002684
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002685 count = stringlib_count(self_s, self_len,
2686 from_s, from_len,
2687 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002688
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002689 if (count == 0) {
2690 /* no matches, return unchanged */
2691 return return_self(self);
2692 }
Christian Heimes44720832008-05-26 13:01:01 +00002693
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002694 /* Check for overflow */
2695 /* result_len = self_len + count * (to_len-from_len) */
2696 product = count * (to_len-from_len);
2697 if (product / (to_len-from_len) != count) {
2698 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699 return NULL;
2700 }
2701 result_len = self_len + product;
2702 if (result_len < 0) {
2703 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2704 return NULL;
2705 }
Christian Heimes44720832008-05-26 13:01:01 +00002706
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002707 if ( (result = (PyStringObject *)
2708 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2709 return NULL;
2710 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002711
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002712 start = self_s;
2713 end = self_s + self_len;
2714 while (count-- > 0) {
2715 offset = stringlib_find(start, end-start,
2716 from_s, from_len,
2717 0);
2718 if (offset == -1)
2719 break;
2720 next = start+offset;
2721 if (next == start) {
2722 /* replace with the 'to' */
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 start += from_len;
2726 } else {
2727 /* copy the unchanged old then the 'to' */
2728 Py_MEMCPY(result_s, start, next-start);
2729 result_s += (next-start);
2730 Py_MEMCPY(result_s, to_s, to_len);
2731 result_s += to_len;
2732 start = next+from_len;
2733 }
2734 }
2735 /* Copy the remainder of the remaining string */
2736 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002737
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002738 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002739}
2740
2741
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002742Py_LOCAL(PyStringObject *)
2743replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 const char *from_s, Py_ssize_t from_len,
2745 const char *to_s, Py_ssize_t to_len,
2746 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002747{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002748 if (maxcount < 0) {
2749 maxcount = PY_SSIZE_T_MAX;
2750 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2751 /* nothing to do; return the original string */
2752 return return_self(self);
2753 }
Christian Heimes44720832008-05-26 13:01:01 +00002754
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002755 if (maxcount == 0 ||
2756 (from_len == 0 && to_len == 0)) {
2757 /* nothing to do; return the original string */
2758 return return_self(self);
2759 }
Christian Heimes44720832008-05-26 13:01:01 +00002760
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002761 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002762
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002763 if (from_len == 0) {
2764 /* insert the 'to' string everywhere. */
2765 /* >>> "Python".replace("", ".") */
2766 /* '.P.y.t.h.o.n.' */
2767 return replace_interleave(self, to_s, to_len, maxcount);
2768 }
Christian Heimes44720832008-05-26 13:01:01 +00002769
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002770 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2771 /* point for an empty self string to generate a non-empty string */
2772 /* Special case so the remaining code always gets a non-empty string */
2773 if (PyString_GET_SIZE(self) == 0) {
2774 return return_self(self);
2775 }
Christian Heimes44720832008-05-26 13:01:01 +00002776
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002777 if (to_len == 0) {
2778 /* delete all occurances of 'from' string */
2779 if (from_len == 1) {
2780 return replace_delete_single_character(
2781 self, from_s[0], maxcount);
2782 } else {
2783 return replace_delete_substring(self, from_s, from_len, maxcount);
2784 }
2785 }
Christian Heimes44720832008-05-26 13:01:01 +00002786
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002787 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002788
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002789 if (from_len == to_len) {
2790 if (from_len == 1) {
2791 return replace_single_character_in_place(
2792 self,
2793 from_s[0],
2794 to_s[0],
2795 maxcount);
2796 } else {
2797 return replace_substring_in_place(
2798 self, from_s, from_len, to_s, to_len, maxcount);
2799 }
2800 }
Christian Heimes44720832008-05-26 13:01:01 +00002801
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002802 /* Otherwise use the more generic algorithms */
2803 if (from_len == 1) {
2804 return replace_single_character(self, from_s[0],
2805 to_s, to_len, maxcount);
2806 } else {
2807 /* len('from')>=2, len('to')>=1 */
2808 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2809 }
Christian Heimes44720832008-05-26 13:01:01 +00002810}
2811
2812PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002813"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002814\n\
2815Return a copy of string S with all occurrences of substring\n\
2816old replaced by new. If the optional argument count is\n\
2817given, only the first count occurrences are replaced.");
2818
2819static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002820string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002821{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002822 Py_ssize_t count = -1;
2823 PyObject *from, *to;
2824 const char *from_s, *to_s;
2825 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002826
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002827 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2828 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002830 if (PyString_Check(from)) {
2831 from_s = PyString_AS_STRING(from);
2832 from_len = PyString_GET_SIZE(from);
2833 }
Christian Heimes44720832008-05-26 13:01:01 +00002834#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 if (PyUnicode_Check(from))
2836 return PyUnicode_Replace((PyObject *)self,
2837 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002838#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002839 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2840 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002841
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002842 if (PyString_Check(to)) {
2843 to_s = PyString_AS_STRING(to);
2844 to_len = PyString_GET_SIZE(to);
2845 }
Christian Heimes44720832008-05-26 13:01:01 +00002846#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002847 else if (PyUnicode_Check(to))
2848 return PyUnicode_Replace((PyObject *)self,
2849 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002850#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002851 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2852 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002853
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002854 return (PyObject *)replace((PyStringObject *) self,
2855 from_s, from_len,
2856 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002857}
2858
2859/** End DALKE **/
2860
2861/* Matches the end (direction >= 0) or start (direction < 0) of self
2862 * against substr, using the start and end arguments. Returns
2863 * -1 on error, 0 if not found and 1 if found.
2864 */
2865Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002866_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002867 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002868{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002869 Py_ssize_t len = PyString_GET_SIZE(self);
2870 Py_ssize_t slen;
2871 const char* sub;
2872 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002873
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002874 if (PyString_Check(substr)) {
2875 sub = PyString_AS_STRING(substr);
2876 slen = PyString_GET_SIZE(substr);
2877 }
Christian Heimes44720832008-05-26 13:01:01 +00002878#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 else if (PyUnicode_Check(substr))
2880 return PyUnicode_Tailmatch((PyObject *)self,
2881 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002882#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002883 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2884 return -1;
2885 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002886
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002887 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002889 if (direction < 0) {
2890 /* startswith */
2891 if (start+slen > len)
2892 return 0;
2893 } else {
2894 /* endswith */
2895 if (end-start < slen || start > len)
2896 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002897
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002898 if (end-slen > start)
2899 start = end - slen;
2900 }
2901 if (end-start >= slen)
2902 return ! memcmp(str+start, sub, slen);
2903 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002904}
2905
2906
2907PyDoc_STRVAR(startswith__doc__,
2908"S.startswith(prefix[, start[, end]]) -> bool\n\
2909\n\
2910Return True if S starts with the specified prefix, False otherwise.\n\
2911With optional start, test S beginning at that position.\n\
2912With optional end, stop comparing S at that position.\n\
2913prefix can also be a tuple of strings to try.");
2914
2915static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002916string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002917{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002918 Py_ssize_t start = 0;
2919 Py_ssize_t end = PY_SSIZE_T_MAX;
2920 PyObject *subobj;
2921 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002922
Jesus Cea44e81682011-04-20 16:39:15 +02002923 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002924 return NULL;
2925 if (PyTuple_Check(subobj)) {
2926 Py_ssize_t i;
2927 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2928 result = _string_tailmatch(self,
2929 PyTuple_GET_ITEM(subobj, i),
2930 start, end, -1);
2931 if (result == -1)
2932 return NULL;
2933 else if (result) {
2934 Py_RETURN_TRUE;
2935 }
2936 }
2937 Py_RETURN_FALSE;
2938 }
2939 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002940 if (result == -1) {
2941 if (PyErr_ExceptionMatches(PyExc_TypeError))
2942 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2943 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002944 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002945 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002946 else
2947 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002948}
2949
2950
2951PyDoc_STRVAR(endswith__doc__,
2952"S.endswith(suffix[, start[, end]]) -> bool\n\
2953\n\
2954Return True if S ends with the specified suffix, False otherwise.\n\
2955With optional start, test S beginning at that position.\n\
2956With optional end, stop comparing S at that position.\n\
2957suffix can also be a tuple of strings to try.");
2958
2959static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002960string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002961{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002962 Py_ssize_t start = 0;
2963 Py_ssize_t end = PY_SSIZE_T_MAX;
2964 PyObject *subobj;
2965 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002966
Jesus Cea44e81682011-04-20 16:39:15 +02002967 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002968 return NULL;
2969 if (PyTuple_Check(subobj)) {
2970 Py_ssize_t i;
2971 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2972 result = _string_tailmatch(self,
2973 PyTuple_GET_ITEM(subobj, i),
2974 start, end, +1);
2975 if (result == -1)
2976 return NULL;
2977 else if (result) {
2978 Py_RETURN_TRUE;
2979 }
2980 }
2981 Py_RETURN_FALSE;
2982 }
2983 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002984 if (result == -1) {
2985 if (PyErr_ExceptionMatches(PyExc_TypeError))
2986 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2987 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002988 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002989 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002990 else
2991 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002992}
2993
2994
2995PyDoc_STRVAR(encode__doc__,
2996"S.encode([encoding[,errors]]) -> object\n\
2997\n\
2998Encodes S using the codec registered for encoding. encoding defaults\n\
2999to the default encoding. errors may be given to set a different error\n\
3000handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3001a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3002'xmlcharrefreplace' as well as any other name registered with\n\
3003codecs.register_error that is able to handle UnicodeEncodeErrors.");
3004
3005static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003006string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003007{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003008 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003009 char *encoding = NULL;
3010 char *errors = NULL;
3011 PyObject *v;
3012
Benjamin Peterson332d7212009-09-18 21:14:55 +00003013 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003014 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003015 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003016 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003017 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003018 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003019 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003020 PyErr_Format(PyExc_TypeError,
3021 "encoder did not return a string/unicode object "
3022 "(type=%.400s)",
3023 Py_TYPE(v)->tp_name);
3024 Py_DECREF(v);
3025 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003026 }
3027 return v;
3028
3029 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003030 return NULL;
3031}
3032
Christian Heimes44720832008-05-26 13:01:01 +00003033
3034PyDoc_STRVAR(decode__doc__,
3035"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003036\n\
Christian Heimes44720832008-05-26 13:01:01 +00003037Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003038to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003039handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3040a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003041as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003042able to handle UnicodeDecodeErrors.");
3043
3044static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003045string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003046{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003047 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003048 char *encoding = NULL;
3049 char *errors = NULL;
3050 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003051
Benjamin Peterson332d7212009-09-18 21:14:55 +00003052 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003053 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003054 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003055 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003056 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003057 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003058 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003059 PyErr_Format(PyExc_TypeError,
3060 "decoder did not return a string/unicode object "
3061 "(type=%.400s)",
3062 Py_TYPE(v)->tp_name);
3063 Py_DECREF(v);
3064 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003065 }
3066 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003067
Christian Heimes44720832008-05-26 13:01:01 +00003068 onError:
3069 return NULL;
3070}
3071
3072
3073PyDoc_STRVAR(expandtabs__doc__,
3074"S.expandtabs([tabsize]) -> string\n\
3075\n\
3076Return a copy of S where all tab characters are expanded using spaces.\n\
3077If tabsize is not given, a tab size of 8 characters is assumed.");
3078
3079static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003080string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003081{
3082 const char *e, *p, *qe;
3083 char *q;
3084 Py_ssize_t i, j, incr;
3085 PyObject *u;
3086 int tabsize = 8;
3087
3088 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003089 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003090
3091 /* First pass: determine size of output string */
3092 i = 0; /* chars up to and including most recent \n or \r */
3093 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003094 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3095 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003096 if (*p == '\t') {
3097 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003098 incr = tabsize - (j % tabsize);
3099 if (j > PY_SSIZE_T_MAX - incr)
3100 goto overflow1;
3101 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003102 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003103 }
3104 else {
3105 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003106 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003107 j++;
3108 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003109 if (i > PY_SSIZE_T_MAX - j)
3110 goto overflow1;
3111 i += j;
3112 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003113 }
3114 }
Christian Heimes44720832008-05-26 13:01:01 +00003115
3116 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003117 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003118
3119 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003120 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003121 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003122 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003123
3124 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003125 q = PyString_AS_STRING(u); /* next output char */
3126 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003127
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003128 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003129 if (*p == '\t') {
3130 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003131 i = tabsize - (j % tabsize);
3132 j += i;
3133 while (i--) {
3134 if (q >= qe)
3135 goto overflow2;
3136 *q++ = ' ';
3137 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003138 }
3139 }
3140 else {
3141 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003142 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003143 *q++ = *p;
3144 j++;
3145 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003146 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003147 }
Christian Heimes44720832008-05-26 13:01:01 +00003148
3149 return u;
3150
3151 overflow2:
3152 Py_DECREF(u);
3153 overflow1:
3154 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3155 return NULL;
3156}
3157
3158Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003159pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003160{
3161 PyObject *u;
3162
3163 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003164 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003165 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003166 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003167
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003168 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003169 Py_INCREF(self);
3170 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003171 }
3172
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003173 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003174 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003175 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003176 if (left)
3177 memset(PyString_AS_STRING(u), fill, left);
3178 Py_MEMCPY(PyString_AS_STRING(u) + left,
3179 PyString_AS_STRING(self),
3180 PyString_GET_SIZE(self));
3181 if (right)
3182 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3183 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003184 }
3185
3186 return u;
3187}
3188
3189PyDoc_STRVAR(ljust__doc__,
3190"S.ljust(width[, fillchar]) -> string\n"
3191"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003192"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003193"done using the specified fill character (default is a space).");
3194
3195static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003196string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003197{
3198 Py_ssize_t width;
3199 char fillchar = ' ';
3200
3201 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003202 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003203
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003204 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003205 Py_INCREF(self);
3206 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003207 }
3208
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003209 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003210}
3211
3212
3213PyDoc_STRVAR(rjust__doc__,
3214"S.rjust(width[, fillchar]) -> string\n"
3215"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003216"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003217"done using the specified fill character (default is a space)");
3218
3219static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003220string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003221{
3222 Py_ssize_t width;
3223 char fillchar = ' ';
3224
3225 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003226 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003227
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003228 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003229 Py_INCREF(self);
3230 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003231 }
3232
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003233 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003234}
3235
3236
3237PyDoc_STRVAR(center__doc__,
3238"S.center(width[, fillchar]) -> string\n"
3239"\n"
3240"Return S centered in a string of length width. Padding is\n"
3241"done using the specified fill character (default is a space)");
3242
3243static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003244string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003245{
3246 Py_ssize_t marg, left;
3247 Py_ssize_t width;
3248 char fillchar = ' ';
3249
3250 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003251 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003252
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003253 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003254 Py_INCREF(self);
3255 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003256 }
3257
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003258 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003259 left = marg / 2 + (marg & width & 1);
3260
3261 return pad(self, left, marg - left, fillchar);
3262}
3263
3264PyDoc_STRVAR(zfill__doc__,
3265"S.zfill(width) -> string\n"
3266"\n"
3267"Pad a numeric string S with zeros on the left, to fill a field\n"
3268"of the specified width. The string S is never truncated.");
3269
3270static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003271string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003272{
3273 Py_ssize_t fill;
3274 PyObject *s;
3275 char *p;
3276 Py_ssize_t width;
3277
3278 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003279 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003280
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003281 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003282 if (PyString_CheckExact(self)) {
3283 Py_INCREF(self);
3284 return (PyObject*) self;
3285 }
3286 else
3287 return PyString_FromStringAndSize(
3288 PyString_AS_STRING(self),
3289 PyString_GET_SIZE(self)
3290 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003291 }
3292
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003293 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003294
Christian Heimes44720832008-05-26 13:01:01 +00003295 s = pad(self, fill, 0, '0');
3296
3297 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003298 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003299
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003300 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003301 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003302 /* move sign to beginning of string */
3303 p[0] = p[fill];
3304 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003305 }
3306
3307 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308}
3309
Christian Heimes44720832008-05-26 13:01:01 +00003310PyDoc_STRVAR(isspace__doc__,
3311"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003312\n\
Christian Heimes44720832008-05-26 13:01:01 +00003313Return True if all characters in S are whitespace\n\
3314and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003315
Christian Heimes44720832008-05-26 13:01:01 +00003316static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003317string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003318{
Christian Heimes44720832008-05-26 13:01:01 +00003319 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003320 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003321 register const unsigned char *e;
3322
3323 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003324 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003325 isspace(*p))
3326 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003327
3328 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003329 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003330 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003331
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003332 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003333 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003334 if (!isspace(*p))
3335 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003336 }
Christian Heimes44720832008-05-26 13:01:01 +00003337 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003338}
3339
Christian Heimes44720832008-05-26 13:01:01 +00003340
3341PyDoc_STRVAR(isalpha__doc__,
3342"S.isalpha() -> bool\n\
3343\n\
3344Return True if all characters in S are alphabetic\n\
3345and there is at least one character in S, False otherwise.");
3346
3347static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349{
Christian Heimes44720832008-05-26 13:01:01 +00003350 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003351 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003352 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353
Christian Heimes44720832008-05-26 13:01:01 +00003354 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003355 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003356 isalpha(*p))
3357 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003358
3359 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003360 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003361 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003362
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003363 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003364 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003365 if (!isalpha(*p))
3366 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003367 }
Christian Heimes44720832008-05-26 13:01:01 +00003368 return PyBool_FromLong(1);
3369}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003370
Christian Heimes44720832008-05-26 13:01:01 +00003371
3372PyDoc_STRVAR(isalnum__doc__,
3373"S.isalnum() -> bool\n\
3374\n\
3375Return True if all characters in S are alphanumeric\n\
3376and there is at least one character in S, False otherwise.");
3377
3378static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003379string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003380{
3381 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003382 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003383 register const unsigned char *e;
3384
3385 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003386 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003387 isalnum(*p))
3388 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003389
3390 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003391 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003392 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003393
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003394 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003395 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003396 if (!isalnum(*p))
3397 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003398 }
3399 return PyBool_FromLong(1);
3400}
3401
3402
3403PyDoc_STRVAR(isdigit__doc__,
3404"S.isdigit() -> bool\n\
3405\n\
3406Return True if all characters in S are digits\n\
3407and there is at least one character in S, False otherwise.");
3408
3409static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003410string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003411{
3412 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003413 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003414 register const unsigned char *e;
3415
3416 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003417 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003418 isdigit(*p))
3419 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003420
3421 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003422 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003423 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003424
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003425 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003426 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003427 if (!isdigit(*p))
3428 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003429 }
3430 return PyBool_FromLong(1);
3431}
3432
3433
3434PyDoc_STRVAR(islower__doc__,
3435"S.islower() -> bool\n\
3436\n\
3437Return True if all cased characters in S are lowercase and there is\n\
3438at least one cased character in S, False otherwise.");
3439
3440static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003441string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003442{
3443 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003444 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003445 register const unsigned char *e;
3446 int cased;
3447
3448 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003450 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003451
3452 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003453 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003454 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003455
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003456 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003457 cased = 0;
3458 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003459 if (isupper(*p))
3460 return PyBool_FromLong(0);
3461 else if (!cased && islower(*p))
3462 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003463 }
3464 return PyBool_FromLong(cased);
3465}
3466
3467
3468PyDoc_STRVAR(isupper__doc__,
3469"S.isupper() -> bool\n\
3470\n\
3471Return True if all cased characters in S are uppercase and there is\n\
3472at least one cased character in S, False otherwise.");
3473
3474static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003475string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003476{
3477 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003478 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003479 register const unsigned char *e;
3480 int cased;
3481
3482 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003484 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003485
3486 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003487 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003488 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003489
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003490 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003491 cased = 0;
3492 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003493 if (islower(*p))
3494 return PyBool_FromLong(0);
3495 else if (!cased && isupper(*p))
3496 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003497 }
3498 return PyBool_FromLong(cased);
3499}
3500
3501
3502PyDoc_STRVAR(istitle__doc__,
3503"S.istitle() -> bool\n\
3504\n\
3505Return True if S is a titlecased string and there is at least one\n\
3506character in S, i.e. uppercase characters may only follow uncased\n\
3507characters and lowercase characters only cased ones. Return False\n\
3508otherwise.");
3509
3510static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003511string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003512{
3513 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003514 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003515 register const unsigned char *e;
3516 int cased, previous_is_cased;
3517
3518 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003519 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003520 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003521
3522 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003523 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003524 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003525
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003526 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003527 cased = 0;
3528 previous_is_cased = 0;
3529 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003530 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003531
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003532 if (isupper(ch)) {
3533 if (previous_is_cased)
3534 return PyBool_FromLong(0);
3535 previous_is_cased = 1;
3536 cased = 1;
3537 }
3538 else if (islower(ch)) {
3539 if (!previous_is_cased)
3540 return PyBool_FromLong(0);
3541 previous_is_cased = 1;
3542 cased = 1;
3543 }
3544 else
3545 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003546 }
3547 return PyBool_FromLong(cased);
3548}
3549
3550
3551PyDoc_STRVAR(splitlines__doc__,
Raymond Hettingeraad5b022012-06-02 01:42:58 -04003552"S.splitlines(keepends=False) -> list of strings\n\
Christian Heimes44720832008-05-26 13:01:01 +00003553\n\
3554Return a list of the lines in S, breaking at line boundaries.\n\
3555Line breaks are not included in the resulting list unless keepends\n\
3556is given and true.");
3557
3558static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003559string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003560{
Christian Heimes44720832008-05-26 13:01:01 +00003561 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003562
3563 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003564 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003565
Antoine Pitrou64672132010-01-13 07:55:48 +00003566 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003567 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3568 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003569 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003570}
3571
Robert Schuppenies51df0642008-06-01 16:16:17 +00003572PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003573"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003574
3575static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003576string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003577{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003578 Py_ssize_t res;
3579 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3580 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003581}
3582
Christian Heimes1a6387e2008-03-26 12:49:49 +00003583static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003584string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003585{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003586 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587}
3588
Christian Heimes1a6387e2008-03-26 12:49:49 +00003589
Christian Heimes44720832008-05-26 13:01:01 +00003590#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003591
Christian Heimes44720832008-05-26 13:01:01 +00003592PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003593"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003594\n\
Eric Smith6c840852010-11-06 19:43:44 +00003595Return a formatted version of S, using substitutions from args and kwargs.\n\
3596The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003597
Eric Smithdc13b792008-05-30 18:10:04 +00003598static PyObject *
3599string__format__(PyObject* self, PyObject* args)
3600{
3601 PyObject *format_spec;
3602 PyObject *result = NULL;
3603 PyObject *tmp = NULL;
3604
3605 /* If 2.x, convert format_spec to the same type as value */
3606 /* This is to allow things like u''.format('') */
3607 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003608 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003609 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003610 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3611 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3612 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003613 }
3614 tmp = PyObject_Str(format_spec);
3615 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003616 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003617 format_spec = tmp;
3618
3619 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003620 PyString_AS_STRING(format_spec),
3621 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003622done:
3623 Py_XDECREF(tmp);
3624 return result;
3625}
3626
Christian Heimes44720832008-05-26 13:01:01 +00003627PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003628"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003629\n\
Eric Smith6c840852010-11-06 19:43:44 +00003630Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003631
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003632
Christian Heimes1a6387e2008-03-26 12:49:49 +00003633static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003634string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003635 /* Counterparts of the obsolete stropmodule functions; except
3636 string.maketrans(). */
3637 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3638 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3639 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3640 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3641 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3642 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3643 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3644 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3645 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3646 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3647 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3648 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3649 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3650 capitalize__doc__},
3651 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3652 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3653 endswith__doc__},
3654 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3655 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3656 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3657 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3658 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3659 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3660 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3661 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3662 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3663 rpartition__doc__},
3664 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3665 startswith__doc__},
3666 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3667 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3668 swapcase__doc__},
3669 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3670 translate__doc__},
3671 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3672 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3673 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3674 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3675 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3676 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3677 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3678 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3679 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3680 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3681 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3682 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3683 expandtabs__doc__},
3684 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3685 splitlines__doc__},
3686 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3687 sizeof__doc__},
3688 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3689 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003690};
3691
3692static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003693str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003694
Christian Heimes44720832008-05-26 13:01:01 +00003695static PyObject *
3696string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3697{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003698 PyObject *x = NULL;
3699 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003700
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003701 if (type != &PyString_Type)
3702 return str_subtype_new(type, args, kwds);
3703 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3704 return NULL;
3705 if (x == NULL)
3706 return PyString_FromString("");
3707 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003708}
3709
3710static PyObject *
3711str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3712{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003713 PyObject *tmp, *pnew;
3714 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003715
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003716 assert(PyType_IsSubtype(type, &PyString_Type));
3717 tmp = string_new(&PyString_Type, args, kwds);
3718 if (tmp == NULL)
3719 return NULL;
3720 assert(PyString_CheckExact(tmp));
3721 n = PyString_GET_SIZE(tmp);
3722 pnew = type->tp_alloc(type, n);
3723 if (pnew != NULL) {
3724 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3725 ((PyStringObject *)pnew)->ob_shash =
3726 ((PyStringObject *)tmp)->ob_shash;
3727 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3728 }
3729 Py_DECREF(tmp);
3730 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003731}
3732
3733static PyObject *
3734basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3735{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003736 PyErr_SetString(PyExc_TypeError,
3737 "The basestring type cannot be instantiated");
3738 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003739}
3740
3741static PyObject *
3742string_mod(PyObject *v, PyObject *w)
3743{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003744 if (!PyString_Check(v)) {
3745 Py_INCREF(Py_NotImplemented);
3746 return Py_NotImplemented;
3747 }
3748 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003749}
3750
3751PyDoc_STRVAR(basestring_doc,
3752"Type basestring cannot be instantiated; it is the base for str and unicode.");
3753
3754static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003755 0, /*nb_add*/
3756 0, /*nb_subtract*/
3757 0, /*nb_multiply*/
3758 0, /*nb_divide*/
3759 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003760};
3761
3762
3763PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003764 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3765 "basestring",
3766 0,
3767 0,
3768 0, /* tp_dealloc */
3769 0, /* tp_print */
3770 0, /* tp_getattr */
3771 0, /* tp_setattr */
3772 0, /* tp_compare */
3773 0, /* tp_repr */
3774 0, /* tp_as_number */
3775 0, /* tp_as_sequence */
3776 0, /* tp_as_mapping */
3777 0, /* tp_hash */
3778 0, /* tp_call */
3779 0, /* tp_str */
3780 0, /* tp_getattro */
3781 0, /* tp_setattro */
3782 0, /* tp_as_buffer */
3783 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3784 basestring_doc, /* tp_doc */
3785 0, /* tp_traverse */
3786 0, /* tp_clear */
3787 0, /* tp_richcompare */
3788 0, /* tp_weaklistoffset */
3789 0, /* tp_iter */
3790 0, /* tp_iternext */
3791 0, /* tp_methods */
3792 0, /* tp_members */
3793 0, /* tp_getset */
3794 &PyBaseObject_Type, /* tp_base */
3795 0, /* tp_dict */
3796 0, /* tp_descr_get */
3797 0, /* tp_descr_set */
3798 0, /* tp_dictoffset */
3799 0, /* tp_init */
3800 0, /* tp_alloc */
3801 basestring_new, /* tp_new */
3802 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003803};
3804
3805PyDoc_STRVAR(string_doc,
Chris Jerdonekad4b0002012-10-07 20:37:54 -07003806"str(object='') -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003807\n\
3808Return a nice string representation of the object.\n\
3809If the argument is a string, the return value is the same object.");
3810
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003811PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003812 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3813 "str",
3814 PyStringObject_SIZE,
3815 sizeof(char),
3816 string_dealloc, /* tp_dealloc */
3817 (printfunc)string_print, /* tp_print */
3818 0, /* tp_getattr */
3819 0, /* tp_setattr */
3820 0, /* tp_compare */
3821 string_repr, /* tp_repr */
3822 &string_as_number, /* tp_as_number */
3823 &string_as_sequence, /* tp_as_sequence */
3824 &string_as_mapping, /* tp_as_mapping */
3825 (hashfunc)string_hash, /* tp_hash */
3826 0, /* tp_call */
3827 string_str, /* tp_str */
3828 PyObject_GenericGetAttr, /* tp_getattro */
3829 0, /* tp_setattro */
3830 &string_as_buffer, /* tp_as_buffer */
3831 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3832 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3833 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3834 string_doc, /* tp_doc */
3835 0, /* tp_traverse */
3836 0, /* tp_clear */
3837 (richcmpfunc)string_richcompare, /* tp_richcompare */
3838 0, /* tp_weaklistoffset */
3839 0, /* tp_iter */
3840 0, /* tp_iternext */
3841 string_methods, /* tp_methods */
3842 0, /* tp_members */
3843 0, /* tp_getset */
3844 &PyBaseString_Type, /* tp_base */
3845 0, /* tp_dict */
3846 0, /* tp_descr_get */
3847 0, /* tp_descr_set */
3848 0, /* tp_dictoffset */
3849 0, /* tp_init */
3850 0, /* tp_alloc */
3851 string_new, /* tp_new */
3852 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003853};
3854
3855void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003856PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003857{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003858 register PyObject *v;
3859 if (*pv == NULL)
3860 return;
3861 if (w == NULL || !PyString_Check(*pv)) {
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02003862 Py_CLEAR(*pv);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003863 return;
3864 }
3865 v = string_concat((PyStringObject *) *pv, w);
3866 Py_DECREF(*pv);
3867 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003868}
3869
3870void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003871PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003872{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003873 PyString_Concat(pv, w);
3874 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003875}
3876
3877
3878/* The following function breaks the notion that strings are immutable:
3879 it changes the size of a string. We get away with this only if there
3880 is only one module referencing the object. You can also think of it
3881 as creating a new string object and destroying the old one, only
3882 more efficiently. In any case, don't use this if the string may
3883 already be known to some other part of the code...
3884 Note that if there's not enough memory to resize the string, the original
3885 string object at *pv is deallocated, *pv is set to NULL, an "out of
3886 memory" exception is set, and -1 is returned. Else (on success) 0 is
3887 returned, and the value in *pv may or may not be the same as on input.
3888 As always, an extra byte is allocated for a trailing \0 byte (newsize
3889 does *not* include that), and a trailing \0 byte is stored.
3890*/
3891
3892int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003893_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003894{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003895 register PyObject *v;
3896 register PyStringObject *sv;
3897 v = *pv;
3898 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3899 PyString_CHECK_INTERNED(v)) {
3900 *pv = 0;
3901 Py_DECREF(v);
3902 PyErr_BadInternalCall();
3903 return -1;
3904 }
3905 /* XXX UNREF/NEWREF interface should be more symmetrical */
3906 _Py_DEC_REFTOTAL;
3907 _Py_ForgetReference(v);
3908 *pv = (PyObject *)
3909 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3910 if (*pv == NULL) {
3911 PyObject_Del(v);
3912 PyErr_NoMemory();
3913 return -1;
3914 }
3915 _Py_NewReference(*pv);
3916 sv = (PyStringObject *) *pv;
3917 Py_SIZE(sv) = newsize;
3918 sv->ob_sval[newsize] = '\0';
3919 sv->ob_shash = -1; /* invalidate cached hash value */
3920 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003921}
3922
3923/* Helpers for formatstring */
3924
3925Py_LOCAL_INLINE(PyObject *)
3926getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003928 Py_ssize_t argidx = *p_argidx;
3929 if (argidx < arglen) {
3930 (*p_argidx)++;
3931 if (arglen < 0)
3932 return args;
3933 else
3934 return PyTuple_GetItem(args, argidx);
3935 }
3936 PyErr_SetString(PyExc_TypeError,
3937 "not enough arguments for format string");
3938 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003939}
3940
3941/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003942 * F_LJUST '-'
3943 * F_SIGN '+'
3944 * F_BLANK ' '
3945 * F_ALT '#'
3946 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003947 */
3948#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003949#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003950#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003951#define F_ALT (1<<3)
3952#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003953
Mark Dickinson18cfada2009-11-23 18:46:41 +00003954/* Returns a new reference to a PyString object, or NULL on failure. */
3955
3956static PyObject *
3957formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003958{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003959 char *p;
3960 PyObject *result;
3961 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003962
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003963 x = PyFloat_AsDouble(v);
3964 if (x == -1.0 && PyErr_Occurred()) {
3965 PyErr_Format(PyExc_TypeError, "float argument required, "
3966 "not %.200s", Py_TYPE(v)->tp_name);
3967 return NULL;
3968 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003969
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003970 if (prec < 0)
3971 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003972
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003973 p = PyOS_double_to_string(x, type, prec,
3974 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003975
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003976 if (p == NULL)
3977 return NULL;
3978 result = PyString_FromStringAndSize(p, strlen(p));
3979 PyMem_Free(p);
3980 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003981}
3982
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003983/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003984 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3985 * Python's regular ints.
3986 * Return value: a new PyString*, or NULL if error.
3987 * . *pbuf is set to point into it,
3988 * *plen set to the # of chars following that.
3989 * Caller must decref it when done using pbuf.
3990 * The string starting at *pbuf is of the form
3991 * "-"? ("0x" | "0X")? digit+
3992 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3993 * set in flags. The case of hex digits will be correct,
3994 * There will be at least prec digits, zero-filled on the left if
3995 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003996 * val object to be converted
3997 * flags bitmask of format flags; only F_ALT is looked at
3998 * prec minimum number of digits; 0-fill on left if needed
3999 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00004000 *
4001 * CAUTION: o, x and X conversions on regular ints can never
4002 * produce a '-' sign, but can for Python's unbounded ints.
4003 */
4004PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004005_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004006 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004007{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004008 PyObject *result = NULL;
4009 char *buf;
4010 Py_ssize_t i;
4011 int sign; /* 1 if '-', else 0 */
4012 int len; /* number of characters */
4013 Py_ssize_t llen;
4014 int numdigits; /* len == numnondigits + numdigits */
4015 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004016
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004017 switch (type) {
4018 case 'd':
4019 case 'u':
4020 result = Py_TYPE(val)->tp_str(val);
4021 break;
4022 case 'o':
4023 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4024 break;
4025 case 'x':
4026 case 'X':
4027 numnondigits = 2;
4028 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4029 break;
4030 default:
4031 assert(!"'type' not in [duoxX]");
4032 }
4033 if (!result)
4034 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004035
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004036 buf = PyString_AsString(result);
4037 if (!buf) {
4038 Py_DECREF(result);
4039 return NULL;
4040 }
Christian Heimes44720832008-05-26 13:01:01 +00004041
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004042 /* To modify the string in-place, there can only be one reference. */
4043 if (Py_REFCNT(result) != 1) {
4044 PyErr_BadInternalCall();
4045 return NULL;
4046 }
4047 llen = PyString_Size(result);
4048 if (llen > INT_MAX) {
4049 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4050 return NULL;
4051 }
4052 len = (int)llen;
4053 if (buf[len-1] == 'L') {
4054 --len;
4055 buf[len] = '\0';
4056 }
4057 sign = buf[0] == '-';
4058 numnondigits += sign;
4059 numdigits = len - numnondigits;
4060 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004061
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004062 /* Get rid of base marker unless F_ALT */
4063 if ((flags & F_ALT) == 0) {
4064 /* Need to skip 0x, 0X or 0. */
4065 int skipped = 0;
4066 switch (type) {
4067 case 'o':
4068 assert(buf[sign] == '0');
4069 /* If 0 is only digit, leave it alone. */
4070 if (numdigits > 1) {
4071 skipped = 1;
4072 --numdigits;
4073 }
4074 break;
4075 case 'x':
4076 case 'X':
4077 assert(buf[sign] == '0');
4078 assert(buf[sign + 1] == 'x');
4079 skipped = 2;
4080 numnondigits -= 2;
4081 break;
4082 }
4083 if (skipped) {
4084 buf += skipped;
4085 len -= skipped;
4086 if (sign)
4087 buf[0] = '-';
4088 }
4089 assert(len == numnondigits + numdigits);
4090 assert(numdigits > 0);
4091 }
Christian Heimes44720832008-05-26 13:01:01 +00004092
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004093 /* Fill with leading zeroes to meet minimum width. */
4094 if (prec > numdigits) {
4095 PyObject *r1 = PyString_FromStringAndSize(NULL,
4096 numnondigits + prec);
4097 char *b1;
4098 if (!r1) {
4099 Py_DECREF(result);
4100 return NULL;
4101 }
4102 b1 = PyString_AS_STRING(r1);
4103 for (i = 0; i < numnondigits; ++i)
4104 *b1++ = *buf++;
4105 for (i = 0; i < prec - numdigits; i++)
4106 *b1++ = '0';
4107 for (i = 0; i < numdigits; i++)
4108 *b1++ = *buf++;
4109 *b1 = '\0';
4110 Py_DECREF(result);
4111 result = r1;
4112 buf = PyString_AS_STRING(result);
4113 len = numnondigits + prec;
4114 }
Christian Heimes44720832008-05-26 13:01:01 +00004115
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004116 /* Fix up case for hex conversions. */
4117 if (type == 'X') {
4118 /* Need to convert all lower case letters to upper case.
4119 and need to convert 0x to 0X (and -0x to -0X). */
4120 for (i = 0; i < len; i++)
4121 if (buf[i] >= 'a' && buf[i] <= 'x')
4122 buf[i] -= 'a'-'A';
4123 }
4124 *pbuf = buf;
4125 *plen = len;
4126 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004127}
4128
4129Py_LOCAL_INLINE(int)
4130formatint(char *buf, size_t buflen, int flags,
4131 int prec, int type, PyObject *v)
4132{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004133 /* fmt = '%#.' + `prec` + 'l' + `type`
4134 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4135 + 1 + 1 = 24 */
4136 char fmt[64]; /* plenty big enough! */
4137 char *sign;
4138 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004139
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004140 x = PyInt_AsLong(v);
4141 if (x == -1 && PyErr_Occurred()) {
4142 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4143 Py_TYPE(v)->tp_name);
4144 return -1;
4145 }
4146 if (x < 0 && type == 'u') {
4147 type = 'd';
4148 }
4149 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4150 sign = "-";
4151 else
4152 sign = "";
4153 if (prec < 0)
4154 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004155
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004156 if ((flags & F_ALT) &&
4157 (type == 'x' || type == 'X')) {
4158 /* When converting under %#x or %#X, there are a number
4159 * of issues that cause pain:
4160 * - when 0 is being converted, the C standard leaves off
4161 * the '0x' or '0X', which is inconsistent with other
4162 * %#x/%#X conversions and inconsistent with Python's
4163 * hex() function
4164 * - there are platforms that violate the standard and
4165 * convert 0 with the '0x' or '0X'
4166 * (Metrowerks, Compaq Tru64)
4167 * - there are platforms that give '0x' when converting
4168 * under %#X, but convert 0 in accordance with the
4169 * standard (OS/2 EMX)
4170 *
4171 * We can achieve the desired consistency by inserting our
4172 * own '0x' or '0X' prefix, and substituting %x/%X in place
4173 * of %#x/%#X.
4174 *
4175 * Note that this is the same approach as used in
4176 * formatint() in unicodeobject.c
4177 */
4178 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4179 sign, type, prec, type);
4180 }
4181 else {
4182 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4183 sign, (flags&F_ALT) ? "#" : "",
4184 prec, type);
4185 }
Christian Heimes44720832008-05-26 13:01:01 +00004186
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004187 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4188 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4189 */
4190 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4191 PyErr_SetString(PyExc_OverflowError,
4192 "formatted integer is too long (precision too large?)");
4193 return -1;
4194 }
4195 if (sign[0])
4196 PyOS_snprintf(buf, buflen, fmt, -x);
4197 else
4198 PyOS_snprintf(buf, buflen, fmt, x);
4199 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004200}
4201
4202Py_LOCAL_INLINE(int)
4203formatchar(char *buf, size_t buflen, PyObject *v)
4204{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004205 /* presume that the buffer is at least 2 characters long */
4206 if (PyString_Check(v)) {
4207 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4208 return -1;
4209 }
4210 else {
4211 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4212 return -1;
4213 }
4214 buf[1] = '\0';
4215 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004216}
4217
4218/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4219
Mark Dickinson18cfada2009-11-23 18:46:41 +00004220 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004221 chars are formatted. XXX This is a magic number. Each formatting
4222 routine does bounds checking to ensure no overflow, but a better
4223 solution may be to malloc a buffer of appropriate size for each
4224 format. For now, the current solution is sufficient.
4225*/
4226#define FORMATBUFLEN (size_t)120
4227
4228PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004229PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004230{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004231 char *fmt, *res;
4232 Py_ssize_t arglen, argidx;
4233 Py_ssize_t reslen, rescnt, fmtcnt;
4234 int args_owned = 0;
4235 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004236#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004237 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004238#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004239 PyObject *dict = NULL;
4240 if (format == NULL || !PyString_Check(format) || args == NULL) {
4241 PyErr_BadInternalCall();
4242 return NULL;
4243 }
4244 orig_args = args;
4245 fmt = PyString_AS_STRING(format);
4246 fmtcnt = PyString_GET_SIZE(format);
4247 reslen = rescnt = fmtcnt + 100;
4248 result = PyString_FromStringAndSize((char *)NULL, reslen);
4249 if (result == NULL)
4250 return NULL;
4251 res = PyString_AsString(result);
4252 if (PyTuple_Check(args)) {
4253 arglen = PyTuple_GET_SIZE(args);
4254 argidx = 0;
4255 }
4256 else {
4257 arglen = -1;
4258 argidx = -2;
4259 }
Benjamin Peterson23d49d32012-08-28 17:55:35 -04004260 if (PyMapping_Check(args) && !PyTuple_Check(args) &&
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004261 !PyObject_TypeCheck(args, &PyBaseString_Type))
4262 dict = args;
4263 while (--fmtcnt >= 0) {
4264 if (*fmt != '%') {
4265 if (--rescnt < 0) {
4266 rescnt = fmtcnt + 100;
4267 reslen += rescnt;
4268 if (_PyString_Resize(&result, reslen))
4269 return NULL;
4270 res = PyString_AS_STRING(result)
4271 + reslen - rescnt;
4272 --rescnt;
4273 }
4274 *res++ = *fmt++;
4275 }
4276 else {
4277 /* Got a format specifier */
4278 int flags = 0;
4279 Py_ssize_t width = -1;
4280 int prec = -1;
4281 int c = '\0';
4282 int fill;
4283 int isnumok;
4284 PyObject *v = NULL;
4285 PyObject *temp = NULL;
4286 char *pbuf;
4287 int sign;
4288 Py_ssize_t len;
4289 char formatbuf[FORMATBUFLEN];
4290 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004291#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004292 char *fmt_start = fmt;
4293 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004294#endif
4295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004296 fmt++;
4297 if (*fmt == '(') {
4298 char *keystart;
4299 Py_ssize_t keylen;
4300 PyObject *key;
4301 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004302
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004303 if (dict == NULL) {
4304 PyErr_SetString(PyExc_TypeError,
4305 "format requires a mapping");
4306 goto error;
4307 }
4308 ++fmt;
4309 --fmtcnt;
4310 keystart = fmt;
4311 /* Skip over balanced parentheses */
4312 while (pcount > 0 && --fmtcnt >= 0) {
4313 if (*fmt == ')')
4314 --pcount;
4315 else if (*fmt == '(')
4316 ++pcount;
4317 fmt++;
4318 }
4319 keylen = fmt - keystart - 1;
4320 if (fmtcnt < 0 || pcount > 0) {
4321 PyErr_SetString(PyExc_ValueError,
4322 "incomplete format key");
4323 goto error;
4324 }
4325 key = PyString_FromStringAndSize(keystart,
4326 keylen);
4327 if (key == NULL)
4328 goto error;
4329 if (args_owned) {
4330 Py_DECREF(args);
4331 args_owned = 0;
4332 }
4333 args = PyObject_GetItem(dict, key);
4334 Py_DECREF(key);
4335 if (args == NULL) {
4336 goto error;
4337 }
4338 args_owned = 1;
4339 arglen = -1;
4340 argidx = -2;
4341 }
4342 while (--fmtcnt >= 0) {
4343 switch (c = *fmt++) {
4344 case '-': flags |= F_LJUST; continue;
4345 case '+': flags |= F_SIGN; continue;
4346 case ' ': flags |= F_BLANK; continue;
4347 case '#': flags |= F_ALT; continue;
4348 case '0': flags |= F_ZERO; continue;
4349 }
4350 break;
4351 }
4352 if (c == '*') {
4353 v = getnextarg(args, arglen, &argidx);
4354 if (v == NULL)
4355 goto error;
4356 if (!PyInt_Check(v)) {
4357 PyErr_SetString(PyExc_TypeError,
4358 "* wants int");
4359 goto error;
4360 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004361 width = PyInt_AsSsize_t(v);
4362 if (width == -1 && PyErr_Occurred())
4363 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004364 if (width < 0) {
4365 flags |= F_LJUST;
4366 width = -width;
4367 }
4368 if (--fmtcnt >= 0)
4369 c = *fmt++;
4370 }
4371 else if (c >= 0 && isdigit(c)) {
4372 width = c - '0';
4373 while (--fmtcnt >= 0) {
4374 c = Py_CHARMASK(*fmt++);
4375 if (!isdigit(c))
4376 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004377 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004378 PyErr_SetString(
4379 PyExc_ValueError,
4380 "width too big");
4381 goto error;
4382 }
4383 width = width*10 + (c - '0');
4384 }
4385 }
4386 if (c == '.') {
4387 prec = 0;
4388 if (--fmtcnt >= 0)
4389 c = *fmt++;
4390 if (c == '*') {
4391 v = getnextarg(args, arglen, &argidx);
4392 if (v == NULL)
4393 goto error;
4394 if (!PyInt_Check(v)) {
4395 PyErr_SetString(
4396 PyExc_TypeError,
4397 "* wants int");
4398 goto error;
4399 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004400 prec = _PyInt_AsInt(v);
4401 if (prec == -1 && PyErr_Occurred())
4402 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004403 if (prec < 0)
4404 prec = 0;
4405 if (--fmtcnt >= 0)
4406 c = *fmt++;
4407 }
4408 else if (c >= 0 && isdigit(c)) {
4409 prec = c - '0';
4410 while (--fmtcnt >= 0) {
4411 c = Py_CHARMASK(*fmt++);
4412 if (!isdigit(c))
4413 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004414 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004415 PyErr_SetString(
4416 PyExc_ValueError,
4417 "prec too big");
4418 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004419 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004420 prec = prec*10 + (c - '0');
4421 }
4422 }
4423 } /* prec */
4424 if (fmtcnt >= 0) {
4425 if (c == 'h' || c == 'l' || c == 'L') {
4426 if (--fmtcnt >= 0)
4427 c = *fmt++;
4428 }
4429 }
4430 if (fmtcnt < 0) {
4431 PyErr_SetString(PyExc_ValueError,
4432 "incomplete format");
4433 goto error;
4434 }
4435 if (c != '%') {
4436 v = getnextarg(args, arglen, &argidx);
4437 if (v == NULL)
4438 goto error;
4439 }
4440 sign = 0;
4441 fill = ' ';
4442 switch (c) {
4443 case '%':
4444 pbuf = "%";
4445 len = 1;
4446 break;
4447 case 's':
4448#ifdef Py_USING_UNICODE
4449 if (PyUnicode_Check(v)) {
4450 fmt = fmt_start;
4451 argidx = argidx_start;
4452 goto unicode;
4453 }
4454#endif
4455 temp = _PyObject_Str(v);
4456#ifdef Py_USING_UNICODE
4457 if (temp != NULL && PyUnicode_Check(temp)) {
4458 Py_DECREF(temp);
4459 fmt = fmt_start;
4460 argidx = argidx_start;
4461 goto unicode;
4462 }
4463#endif
4464 /* Fall through */
4465 case 'r':
4466 if (c == 'r')
4467 temp = PyObject_Repr(v);
4468 if (temp == NULL)
4469 goto error;
4470 if (!PyString_Check(temp)) {
4471 PyErr_SetString(PyExc_TypeError,
4472 "%s argument has non-string str()");
4473 Py_DECREF(temp);
4474 goto error;
4475 }
4476 pbuf = PyString_AS_STRING(temp);
4477 len = PyString_GET_SIZE(temp);
4478 if (prec >= 0 && len > prec)
4479 len = prec;
4480 break;
4481 case 'i':
4482 case 'd':
4483 case 'u':
4484 case 'o':
4485 case 'x':
4486 case 'X':
4487 if (c == 'i')
4488 c = 'd';
4489 isnumok = 0;
4490 if (PyNumber_Check(v)) {
4491 PyObject *iobj=NULL;
4492
4493 if (PyInt_Check(v) || (PyLong_Check(v))) {
4494 iobj = v;
4495 Py_INCREF(iobj);
4496 }
4497 else {
4498 iobj = PyNumber_Int(v);
Benjamin Petersona708adf2013-01-02 12:21:32 -06004499 if (iobj==NULL) {
Benjamin Peterson8f53ded2013-01-02 12:25:15 -06004500 PyErr_Clear();
4501 iobj = PyNumber_Long(v);
4502 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004503 }
4504 if (iobj!=NULL) {
4505 if (PyInt_Check(iobj)) {
4506 isnumok = 1;
4507 pbuf = formatbuf;
4508 len = formatint(pbuf,
4509 sizeof(formatbuf),
4510 flags, prec, c, iobj);
4511 Py_DECREF(iobj);
4512 if (len < 0)
4513 goto error;
4514 sign = 1;
4515 }
4516 else if (PyLong_Check(iobj)) {
4517 int ilen;
4518
4519 isnumok = 1;
4520 temp = _PyString_FormatLong(iobj, flags,
4521 prec, c, &pbuf, &ilen);
4522 Py_DECREF(iobj);
4523 len = ilen;
4524 if (!temp)
4525 goto error;
4526 sign = 1;
4527 }
4528 else {
4529 Py_DECREF(iobj);
4530 }
4531 }
4532 }
4533 if (!isnumok) {
4534 PyErr_Format(PyExc_TypeError,
4535 "%%%c format: a number is required, "
4536 "not %.200s", c, Py_TYPE(v)->tp_name);
4537 goto error;
4538 }
4539 if (flags & F_ZERO)
4540 fill = '0';
4541 break;
4542 case 'e':
4543 case 'E':
4544 case 'f':
4545 case 'F':
4546 case 'g':
4547 case 'G':
4548 temp = formatfloat(v, flags, prec, c);
4549 if (temp == NULL)
4550 goto error;
4551 pbuf = PyString_AS_STRING(temp);
4552 len = PyString_GET_SIZE(temp);
4553 sign = 1;
4554 if (flags & F_ZERO)
4555 fill = '0';
4556 break;
4557 case 'c':
4558#ifdef Py_USING_UNICODE
4559 if (PyUnicode_Check(v)) {
4560 fmt = fmt_start;
4561 argidx = argidx_start;
4562 goto unicode;
4563 }
4564#endif
4565 pbuf = formatbuf;
4566 len = formatchar(pbuf, sizeof(formatbuf), v);
4567 if (len < 0)
4568 goto error;
4569 break;
4570 default:
4571 PyErr_Format(PyExc_ValueError,
4572 "unsupported format character '%c' (0x%x) "
4573 "at index %zd",
4574 c, c,
4575 (Py_ssize_t)(fmt - 1 -
4576 PyString_AsString(format)));
4577 goto error;
4578 }
4579 if (sign) {
4580 if (*pbuf == '-' || *pbuf == '+') {
4581 sign = *pbuf++;
4582 len--;
4583 }
4584 else if (flags & F_SIGN)
4585 sign = '+';
4586 else if (flags & F_BLANK)
4587 sign = ' ';
4588 else
4589 sign = 0;
4590 }
4591 if (width < len)
4592 width = len;
4593 if (rescnt - (sign != 0) < width) {
4594 reslen -= rescnt;
4595 rescnt = width + fmtcnt + 100;
4596 reslen += rescnt;
4597 if (reslen < 0) {
4598 Py_DECREF(result);
4599 Py_XDECREF(temp);
4600 return PyErr_NoMemory();
4601 }
4602 if (_PyString_Resize(&result, reslen)) {
4603 Py_XDECREF(temp);
4604 return NULL;
4605 }
4606 res = PyString_AS_STRING(result)
4607 + reslen - rescnt;
4608 }
4609 if (sign) {
4610 if (fill != ' ')
4611 *res++ = sign;
4612 rescnt--;
4613 if (width > len)
4614 width--;
4615 }
4616 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4617 assert(pbuf[0] == '0');
4618 assert(pbuf[1] == c);
4619 if (fill != ' ') {
4620 *res++ = *pbuf++;
4621 *res++ = *pbuf++;
4622 }
4623 rescnt -= 2;
4624 width -= 2;
4625 if (width < 0)
4626 width = 0;
4627 len -= 2;
4628 }
4629 if (width > len && !(flags & F_LJUST)) {
4630 do {
4631 --rescnt;
4632 *res++ = fill;
4633 } while (--width > len);
4634 }
4635 if (fill == ' ') {
4636 if (sign)
4637 *res++ = sign;
4638 if ((flags & F_ALT) &&
4639 (c == 'x' || c == 'X')) {
4640 assert(pbuf[0] == '0');
4641 assert(pbuf[1] == c);
4642 *res++ = *pbuf++;
4643 *res++ = *pbuf++;
4644 }
4645 }
4646 Py_MEMCPY(res, pbuf, len);
4647 res += len;
4648 rescnt -= len;
4649 while (--width >= len) {
4650 --rescnt;
4651 *res++ = ' ';
4652 }
4653 if (dict && (argidx < arglen) && c != '%') {
4654 PyErr_SetString(PyExc_TypeError,
4655 "not all arguments converted during string formatting");
4656 Py_XDECREF(temp);
4657 goto error;
4658 }
4659 Py_XDECREF(temp);
4660 } /* '%' */
4661 } /* until end */
4662 if (argidx < arglen && !dict) {
4663 PyErr_SetString(PyExc_TypeError,
4664 "not all arguments converted during string formatting");
4665 goto error;
4666 }
4667 if (args_owned) {
4668 Py_DECREF(args);
4669 }
4670 if (_PyString_Resize(&result, reslen - rescnt))
4671 return NULL;
4672 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004673
4674#ifdef Py_USING_UNICODE
4675 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004676 if (args_owned) {
4677 Py_DECREF(args);
4678 args_owned = 0;
4679 }
4680 /* Fiddle args right (remove the first argidx arguments) */
4681 if (PyTuple_Check(orig_args) && argidx > 0) {
4682 PyObject *v;
4683 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4684 v = PyTuple_New(n);
4685 if (v == NULL)
4686 goto error;
4687 while (--n >= 0) {
4688 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4689 Py_INCREF(w);
4690 PyTuple_SET_ITEM(v, n, w);
4691 }
4692 args = v;
4693 } else {
4694 Py_INCREF(orig_args);
4695 args = orig_args;
4696 }
4697 args_owned = 1;
4698 /* Take what we have of the result and let the Unicode formatting
4699 function format the rest of the input. */
4700 rescnt = res - PyString_AS_STRING(result);
4701 if (_PyString_Resize(&result, rescnt))
4702 goto error;
4703 fmtcnt = PyString_GET_SIZE(format) - \
4704 (fmt - PyString_AS_STRING(format));
4705 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4706 if (format == NULL)
4707 goto error;
4708 v = PyUnicode_Format(format, args);
4709 Py_DECREF(format);
4710 if (v == NULL)
4711 goto error;
4712 /* Paste what we have (result) to what the Unicode formatting
4713 function returned (v) and return the result (or error) */
4714 w = PyUnicode_Concat(result, v);
4715 Py_DECREF(result);
4716 Py_DECREF(v);
4717 Py_DECREF(args);
4718 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004719#endif /* Py_USING_UNICODE */
4720
4721 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004722 Py_DECREF(result);
4723 if (args_owned) {
4724 Py_DECREF(args);
4725 }
4726 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004727}
4728
4729void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004730PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004731{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004732 register PyStringObject *s = (PyStringObject *)(*p);
4733 PyObject *t;
4734 if (s == NULL || !PyString_Check(s))
4735 Py_FatalError("PyString_InternInPlace: strings only please!");
4736 /* If it's a string subclass, we don't really know what putting
4737 it in the interned dict might do. */
4738 if (!PyString_CheckExact(s))
4739 return;
4740 if (PyString_CHECK_INTERNED(s))
4741 return;
4742 if (interned == NULL) {
4743 interned = PyDict_New();
4744 if (interned == NULL) {
4745 PyErr_Clear(); /* Don't leave an exception */
4746 return;
4747 }
4748 }
4749 t = PyDict_GetItem(interned, (PyObject *)s);
4750 if (t) {
4751 Py_INCREF(t);
4752 Py_DECREF(*p);
4753 *p = t;
4754 return;
4755 }
Christian Heimes44720832008-05-26 13:01:01 +00004756
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004757 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4758 PyErr_Clear();
4759 return;
4760 }
4761 /* The two references in interned are not counted by refcnt.
4762 The string deallocator will take care of this */
4763 Py_REFCNT(s) -= 2;
4764 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004765}
4766
4767void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004768PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004769{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004770 PyString_InternInPlace(p);
4771 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4772 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4773 Py_INCREF(*p);
4774 }
Christian Heimes44720832008-05-26 13:01:01 +00004775}
4776
4777
4778PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004779PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004781 PyObject *s = PyString_FromString(cp);
4782 if (s == NULL)
4783 return NULL;
4784 PyString_InternInPlace(&s);
4785 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004786}
4787
4788void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004789PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004790{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004791 int i;
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004792 for (i = 0; i < UCHAR_MAX + 1; i++)
4793 Py_CLEAR(characters[i]);
4794 Py_CLEAR(nullstring);
Christian Heimes44720832008-05-26 13:01:01 +00004795}
4796
4797void _Py_ReleaseInternedStrings(void)
4798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004799 PyObject *keys;
4800 PyStringObject *s;
4801 Py_ssize_t i, n;
4802 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004803
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004804 if (interned == NULL || !PyDict_Check(interned))
4805 return;
4806 keys = PyDict_Keys(interned);
4807 if (keys == NULL || !PyList_Check(keys)) {
4808 PyErr_Clear();
4809 return;
4810 }
Christian Heimes44720832008-05-26 13:01:01 +00004811
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004812 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4813 detector, interned strings are not forcibly deallocated; rather, we
4814 give them their stolen references back, and then clear and DECREF
4815 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004816
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004817 n = PyList_GET_SIZE(keys);
4818 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4819 n);
4820 for (i = 0; i < n; i++) {
4821 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4822 switch (s->ob_sstate) {
4823 case SSTATE_NOT_INTERNED:
4824 /* XXX Shouldn't happen */
4825 break;
4826 case SSTATE_INTERNED_IMMORTAL:
4827 Py_REFCNT(s) += 1;
4828 immortal_size += Py_SIZE(s);
4829 break;
4830 case SSTATE_INTERNED_MORTAL:
4831 Py_REFCNT(s) += 2;
4832 mortal_size += Py_SIZE(s);
4833 break;
4834 default:
4835 Py_FatalError("Inconsistent interned string state.");
4836 }
4837 s->ob_sstate = SSTATE_NOT_INTERNED;
4838 }
4839 fprintf(stderr, "total size of all interned strings: "
4840 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4841 "mortal/immortal\n", mortal_size, immortal_size);
4842 Py_DECREF(keys);
4843 PyDict_Clear(interned);
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004844 Py_CLEAR(interned);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004845}