blob: e1ea3cd80f7c87ebd727554ae9dea218edf9648a [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Serhiy Storchaka01b3a082013-01-25 23:30:50 +0200729 /* skip \x */
730 if (s < end && isxdigit(Py_CHARMASK(s[0])))
731 s++; /* and a hexdigit */
732 break;
Christian Heimes44720832008-05-26 13:01:01 +0000733#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
Christian Heimes44720832008-05-26 13:01:01 +0000743#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 default:
745 *p++ = '\\';
746 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200747 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000748 UTF-8 bytes may follow. */
749 }
750 }
751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
753 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000754 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 Py_DECREF(v);
756 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
Christian Heimes44720832008-05-26 13:01:01 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
Christian Heimes44720832008-05-26 13:01:01 +0000884#ifdef __VMS
Ronald Oussoren3687e802013-07-11 13:33:55 +0200885 if (size) fwrite(data, (size_t)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000886#else
Ronald Oussoren3687e802013-07-11 13:33:55 +0200887 fwrite(data, 1, (size_t)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000888#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 Py_END_ALLOW_THREADS
890 return 0;
891 }
Christian Heimes44720832008-05-26 13:01:01 +0000892
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
936 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000945
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000952
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000953 p = PyString_AS_STRING(v);
954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
974 }
975 else
976 *p++ = c;
977 }
978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979 *p++ = quote;
980 *p = '\0';
981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
983 return v;
984 }
Christian Heimes44720832008-05-26 13:01:01 +0000985}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000986
987static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000988string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000990 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991}
992
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000994string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000995{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
998 Py_INCREF(s);
999 return s;
1000 }
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
Christian Heimes44720832008-05-26 13:01:01 +00001008static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001009string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001010{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001012}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013
Christian Heimes44720832008-05-26 13:01:01 +00001014static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001015string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001016{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 register Py_ssize_t size;
1018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001023#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1030 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001031#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1038 }
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1041 }
1042 size = Py_SIZE(a) + Py_SIZE(b);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001054 /* Inline PyObject_NewVar */
1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1059 }
1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061 if (op == NULL)
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op, &PyString_Type, size);
1064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001070#undef b
1071}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001072
Christian Heimes44720832008-05-26 13:01:01 +00001073static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001075{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
1079 register PyStringObject *op;
1080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1085 */
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093 Py_INCREF(a);
1094 return (PyObject *)a;
1095 }
1096 nbytes = (size_t)size;
1097 if (nbytes + PyStringObject_SIZE <= nbytes) {
1098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1101 }
1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103 if (op == NULL)
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op, &PyString_Type, size);
1106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1112 }
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1117 }
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1122 }
1123 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001124}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001125
Christian Heimes44720832008-05-26 13:01:01 +00001126/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001129string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001131 /* j -- may be negative! */
1132{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1143 }
1144 if (j < i)
1145 j = i;
1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001147}
1148
1149static int
1150string_contains(PyObject *str_obj, PyObject *sub_obj)
1151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001152 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001153#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001156#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001157 if (!PyString_Check(sub_obj)) {
1158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1162 }
1163 }
Christian Heimes44720832008-05-26 13:01:01 +00001164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001166}
1167
1168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001169string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001170{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1176 }
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
1180 v = PyString_FromStringAndSize(&pchar, 1);
1181 else {
Christian Heimes44720832008-05-26 13:01:01 +00001182#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001183 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001185 Py_INCREF(v);
1186 }
1187 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001188}
1189
1190static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001197
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a) && PyString_Check(b))) {
1200 result = Py_NotImplemented;
1201 goto out;
1202 }
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1211 }
1212 }
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1222 }
1223 goto out;
1224 }
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1245 }
1246 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001247 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001248 Py_INCREF(result);
1249 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001250}
1251
1252int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001254{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
1257 return Py_SIZE(a) == Py_SIZE(b)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001258 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001259}
1260
1261static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001262string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001263{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001264 register Py_ssize_t len;
1265 register unsigned char *p;
1266 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001267
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001268#ifdef Py_DEBUG
Benjamin Peterson26da9202012-02-21 11:08:50 -05001269 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001270#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001271 if (a->ob_shash != -1)
1272 return a->ob_shash;
1273 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001274 /*
1275 We make the hash of the empty string be 0, rather than using
1276 (prefix ^ suffix), since this slightly obfuscates the hash secret
1277 */
1278 if (len == 0) {
1279 a->ob_shash = 0;
1280 return 0;
1281 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001283 x = _Py_HashSecret.prefix;
1284 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001285 while (--len >= 0)
1286 x = (1000003*x) ^ *p++;
1287 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001288 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001289 if (x == -1)
1290 x = -2;
1291 a->ob_shash = x;
1292 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001293}
1294
1295static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001296string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001297{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001298 if (PyIndex_Check(item)) {
1299 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1300 if (i == -1 && PyErr_Occurred())
1301 return NULL;
1302 if (i < 0)
1303 i += PyString_GET_SIZE(self);
1304 return string_item(self, i);
1305 }
1306 else if (PySlice_Check(item)) {
1307 Py_ssize_t start, stop, step, slicelength, cur, i;
1308 char* source_buf;
1309 char* result_buf;
1310 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001311
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001312 if (PySlice_GetIndicesEx((PySliceObject*)item,
1313 PyString_GET_SIZE(self),
1314 &start, &stop, &step, &slicelength) < 0) {
1315 return NULL;
1316 }
Christian Heimes44720832008-05-26 13:01:01 +00001317
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001318 if (slicelength <= 0) {
1319 return PyString_FromStringAndSize("", 0);
1320 }
1321 else if (start == 0 && step == 1 &&
1322 slicelength == PyString_GET_SIZE(self) &&
1323 PyString_CheckExact(self)) {
1324 Py_INCREF(self);
1325 return (PyObject *)self;
1326 }
1327 else if (step == 1) {
1328 return PyString_FromStringAndSize(
1329 PyString_AS_STRING(self) + start,
1330 slicelength);
1331 }
1332 else {
1333 source_buf = PyString_AsString((PyObject*)self);
1334 result_buf = (char *)PyMem_Malloc(slicelength);
1335 if (result_buf == NULL)
1336 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001337
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001338 for (cur = start, i = 0; i < slicelength;
1339 cur += step, i++) {
1340 result_buf[i] = source_buf[cur];
1341 }
Christian Heimes44720832008-05-26 13:01:01 +00001342
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001343 result = PyString_FromStringAndSize(result_buf,
1344 slicelength);
1345 PyMem_Free(result_buf);
1346 return result;
1347 }
1348 }
1349 else {
1350 PyErr_Format(PyExc_TypeError,
1351 "string indices must be integers, not %.200s",
1352 Py_TYPE(item)->tp_name);
1353 return NULL;
1354 }
Christian Heimes44720832008-05-26 13:01:01 +00001355}
1356
1357static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001358string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001359{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001360 if ( index != 0 ) {
1361 PyErr_SetString(PyExc_SystemError,
1362 "accessing non-existent string segment");
1363 return -1;
1364 }
1365 *ptr = (void *)self->ob_sval;
1366 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001367}
1368
1369static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001370string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001371{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 PyErr_SetString(PyExc_TypeError,
1373 "Cannot use string as modifiable buffer");
1374 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001375}
1376
1377static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001378string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001379{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001380 if ( lenp )
1381 *lenp = Py_SIZE(self);
1382 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001383}
1384
1385static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001386string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001387{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001388 if ( index != 0 ) {
1389 PyErr_SetString(PyExc_SystemError,
1390 "accessing non-existent string segment");
1391 return -1;
1392 }
1393 *ptr = self->ob_sval;
1394 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001395}
1396
1397static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001398string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001399{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001400 return PyBuffer_FillInfo(view, (PyObject*)self,
1401 (void *)self->ob_sval, Py_SIZE(self),
1402 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001403}
1404
1405static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001406 (lenfunc)string_length, /*sq_length*/
1407 (binaryfunc)string_concat, /*sq_concat*/
1408 (ssizeargfunc)string_repeat, /*sq_repeat*/
1409 (ssizeargfunc)string_item, /*sq_item*/
1410 (ssizessizeargfunc)string_slice, /*sq_slice*/
1411 0, /*sq_ass_item*/
1412 0, /*sq_ass_slice*/
1413 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001414};
1415
1416static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001417 (lenfunc)string_length,
1418 (binaryfunc)string_subscript,
1419 0,
Christian Heimes44720832008-05-26 13:01:01 +00001420};
1421
1422static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001423 (readbufferproc)string_buffer_getreadbuf,
1424 (writebufferproc)string_buffer_getwritebuf,
1425 (segcountproc)string_buffer_getsegcount,
1426 (charbufferproc)string_buffer_getcharbuf,
1427 (getbufferproc)string_buffer_getbuffer,
1428 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001429};
1430
1431
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001432
Christian Heimes44720832008-05-26 13:01:01 +00001433#define LEFTSTRIP 0
1434#define RIGHTSTRIP 1
1435#define BOTHSTRIP 2
1436
1437/* Arrays indexed by above */
1438static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1439
1440#define STRIPNAME(i) (stripformat[i]+3)
1441
Christian Heimes1a6387e2008-03-26 12:49:49 +00001442PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001443"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001444\n\
Christian Heimes44720832008-05-26 13:01:01 +00001445Return a list of the words in the string S, using sep as the\n\
1446delimiter string. If maxsplit is given, at most maxsplit\n\
1447splits are done. If sep is not specified or is None, any\n\
1448whitespace string is a separator and empty strings are removed\n\
1449from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001450
1451static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001452string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001453{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001454 Py_ssize_t len = PyString_GET_SIZE(self), n;
1455 Py_ssize_t maxsplit = -1;
1456 const char *s = PyString_AS_STRING(self), *sub;
1457 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001458
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001459 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1460 return NULL;
1461 if (maxsplit < 0)
1462 maxsplit = PY_SSIZE_T_MAX;
1463 if (subobj == Py_None)
1464 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1465 if (PyString_Check(subobj)) {
1466 sub = PyString_AS_STRING(subobj);
1467 n = PyString_GET_SIZE(subobj);
1468 }
Christian Heimes44720832008-05-26 13:01:01 +00001469#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001470 else if (PyUnicode_Check(subobj))
1471 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001472#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1474 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001476 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001477}
1478
1479PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001480"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001481\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001482Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001484found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485
1486static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001487string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001489 const char *sep;
1490 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001491
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001492 if (PyString_Check(sep_obj)) {
1493 sep = PyString_AS_STRING(sep_obj);
1494 sep_len = PyString_GET_SIZE(sep_obj);
1495 }
Christian Heimes44720832008-05-26 13:01:01 +00001496#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001497 else if (PyUnicode_Check(sep_obj))
1498 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001499#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001500 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1501 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001502
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001503 return stringlib_partition(
1504 (PyObject*) self,
1505 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1506 sep_obj, sep, sep_len
1507 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001508}
1509
1510PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001511"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001512\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001513Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001514the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001515separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001516
1517static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001518string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001519{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001520 const char *sep;
1521 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001523 if (PyString_Check(sep_obj)) {
1524 sep = PyString_AS_STRING(sep_obj);
1525 sep_len = PyString_GET_SIZE(sep_obj);
1526 }
Christian Heimes44720832008-05-26 13:01:01 +00001527#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001528 else if (PyUnicode_Check(sep_obj))
1529 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001530#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001531 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1532 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001534 return stringlib_rpartition(
1535 (PyObject*) self,
1536 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1537 sep_obj, sep, sep_len
1538 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539}
1540
Christian Heimes1a6387e2008-03-26 12:49:49 +00001541PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001542"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543\n\
Christian Heimes44720832008-05-26 13:01:01 +00001544Return a list of the words in the string S, using sep as the\n\
1545delimiter string, starting at the end of the string and working\n\
1546to the front. If maxsplit is given, at most maxsplit splits are\n\
1547done. If sep is not specified or is None, any whitespace string\n\
1548is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001549
1550static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001551string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001552{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 Py_ssize_t len = PyString_GET_SIZE(self), n;
1554 Py_ssize_t maxsplit = -1;
1555 const char *s = PyString_AS_STRING(self), *sub;
1556 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001557
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001558 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1559 return NULL;
1560 if (maxsplit < 0)
1561 maxsplit = PY_SSIZE_T_MAX;
1562 if (subobj == Py_None)
1563 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1564 if (PyString_Check(subobj)) {
1565 sub = PyString_AS_STRING(subobj);
1566 n = PyString_GET_SIZE(subobj);
1567 }
Christian Heimes44720832008-05-26 13:01:01 +00001568#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001569 else if (PyUnicode_Check(subobj))
1570 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001571#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001574
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001575 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001576}
1577
1578
1579PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001580"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001581\n\
1582Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001583iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001584
1585static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001586string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001587{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001588 char *sep = PyString_AS_STRING(self);
1589 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1590 PyObject *res = NULL;
1591 char *p;
1592 Py_ssize_t seqlen = 0;
1593 size_t sz = 0;
1594 Py_ssize_t i;
1595 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001596
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001597 seq = PySequence_Fast(orig, "");
1598 if (seq == NULL) {
1599 return NULL;
1600 }
Christian Heimes44720832008-05-26 13:01:01 +00001601
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001602 seqlen = PySequence_Size(seq);
1603 if (seqlen == 0) {
1604 Py_DECREF(seq);
1605 return PyString_FromString("");
1606 }
1607 if (seqlen == 1) {
1608 item = PySequence_Fast_GET_ITEM(seq, 0);
1609 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1610 Py_INCREF(item);
1611 Py_DECREF(seq);
1612 return item;
1613 }
1614 }
Christian Heimes44720832008-05-26 13:01:01 +00001615
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001616 /* There are at least two things to join, or else we have a subclass
1617 * of the builtin types in the sequence.
1618 * Do a pre-pass to figure out the total amount of space we'll
1619 * need (sz), see whether any argument is absurd, and defer to
1620 * the Unicode join if appropriate.
1621 */
1622 for (i = 0; i < seqlen; i++) {
1623 const size_t old_sz = sz;
1624 item = PySequence_Fast_GET_ITEM(seq, i);
1625 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001626#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001627 if (PyUnicode_Check(item)) {
1628 /* Defer to Unicode join.
1629 * CAUTION: There's no gurantee that the
1630 * original sequence can be iterated over
1631 * again, so we must pass seq here.
1632 */
1633 PyObject *result;
1634 result = PyUnicode_Join((PyObject *)self, seq);
1635 Py_DECREF(seq);
1636 return result;
1637 }
Christian Heimes44720832008-05-26 13:01:01 +00001638#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001639 PyErr_Format(PyExc_TypeError,
1640 "sequence item %zd: expected string,"
1641 " %.80s found",
1642 i, Py_TYPE(item)->tp_name);
1643 Py_DECREF(seq);
1644 return NULL;
1645 }
1646 sz += PyString_GET_SIZE(item);
1647 if (i != 0)
1648 sz += seplen;
1649 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1650 PyErr_SetString(PyExc_OverflowError,
1651 "join() result is too long for a Python string");
1652 Py_DECREF(seq);
1653 return NULL;
1654 }
1655 }
Christian Heimes44720832008-05-26 13:01:01 +00001656
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001657 /* Allocate result space. */
1658 res = PyString_FromStringAndSize((char*)NULL, sz);
1659 if (res == NULL) {
1660 Py_DECREF(seq);
1661 return NULL;
1662 }
Christian Heimes44720832008-05-26 13:01:01 +00001663
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001664 /* Catenate everything. */
1665 p = PyString_AS_STRING(res);
1666 for (i = 0; i < seqlen; ++i) {
1667 size_t n;
1668 item = PySequence_Fast_GET_ITEM(seq, i);
1669 n = PyString_GET_SIZE(item);
1670 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1671 p += n;
1672 if (i < seqlen - 1) {
1673 Py_MEMCPY(p, sep, seplen);
1674 p += seplen;
1675 }
1676 }
Christian Heimes44720832008-05-26 13:01:01 +00001677
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001678 Py_DECREF(seq);
1679 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001680}
1681
1682PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001683_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001684{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001685 assert(sep != NULL && PyString_Check(sep));
1686 assert(x != NULL);
1687 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001688}
1689
Antoine Pitrou64672132010-01-13 07:55:48 +00001690/* helper macro to fixup start/end slice values */
1691#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 if (end > len) \
1693 end = len; \
1694 else if (end < 0) { \
1695 end += len; \
1696 if (end < 0) \
1697 end = 0; \
1698 } \
1699 if (start < 0) { \
1700 start += len; \
1701 if (start < 0) \
1702 start = 0; \
1703 }
Christian Heimes44720832008-05-26 13:01:01 +00001704
1705Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001706string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001707{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001708 PyObject *subobj;
1709 const char *sub;
1710 Py_ssize_t sub_len;
1711 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001712
Jesus Cea44e81682011-04-20 16:39:15 +02001713 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1714 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001716
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001717 if (PyString_Check(subobj)) {
1718 sub = PyString_AS_STRING(subobj);
1719 sub_len = PyString_GET_SIZE(subobj);
1720 }
Christian Heimes44720832008-05-26 13:01:01 +00001721#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001722 else if (PyUnicode_Check(subobj))
1723 return PyUnicode_Find(
1724 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001725#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001726 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1727 /* XXX - the "expected a character buffer object" is pretty
1728 confusing for a non-expert. remap to something else ? */
1729 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001730
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001731 if (dir > 0)
1732 return stringlib_find_slice(
1733 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1734 sub, sub_len, start, end);
1735 else
1736 return stringlib_rfind_slice(
1737 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1738 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001739}
1740
1741
1742PyDoc_STRVAR(find__doc__,
1743"S.find(sub [,start [,end]]) -> int\n\
1744\n\
1745Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001746such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001747arguments start and end are interpreted as in slice notation.\n\
1748\n\
1749Return -1 on failure.");
1750
1751static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001752string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001753{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001754 Py_ssize_t result = string_find_internal(self, args, +1);
1755 if (result == -2)
1756 return NULL;
1757 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001758}
1759
1760
1761PyDoc_STRVAR(index__doc__,
1762"S.index(sub [,start [,end]]) -> int\n\
1763\n\
1764Like S.find() but raise ValueError when the substring is not found.");
1765
1766static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001767string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001768{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001769 Py_ssize_t result = string_find_internal(self, args, +1);
1770 if (result == -2)
1771 return NULL;
1772 if (result == -1) {
1773 PyErr_SetString(PyExc_ValueError,
1774 "substring not found");
1775 return NULL;
1776 }
1777 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001778}
1779
1780
1781PyDoc_STRVAR(rfind__doc__,
1782"S.rfind(sub [,start [,end]]) -> int\n\
1783\n\
1784Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001785such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001786arguments start and end are interpreted as in slice notation.\n\
1787\n\
1788Return -1 on failure.");
1789
1790static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001791string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001793 Py_ssize_t result = string_find_internal(self, args, -1);
1794 if (result == -2)
1795 return NULL;
1796 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001797}
1798
1799
1800PyDoc_STRVAR(rindex__doc__,
1801"S.rindex(sub [,start [,end]]) -> int\n\
1802\n\
1803Like S.rfind() but raise ValueError when the substring is not found.");
1804
1805static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001806string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001807{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001808 Py_ssize_t result = string_find_internal(self, args, -1);
1809 if (result == -2)
1810 return NULL;
1811 if (result == -1) {
1812 PyErr_SetString(PyExc_ValueError,
1813 "substring not found");
1814 return NULL;
1815 }
1816 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001817}
1818
1819
1820Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001821do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001822{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001823 char *s = PyString_AS_STRING(self);
1824 Py_ssize_t len = PyString_GET_SIZE(self);
1825 char *sep = PyString_AS_STRING(sepobj);
1826 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1827 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001828
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001829 i = 0;
1830 if (striptype != RIGHTSTRIP) {
1831 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1832 i++;
1833 }
1834 }
Christian Heimes44720832008-05-26 13:01:01 +00001835
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001836 j = len;
1837 if (striptype != LEFTSTRIP) {
1838 do {
1839 j--;
1840 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1841 j++;
1842 }
Christian Heimes44720832008-05-26 13:01:01 +00001843
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001844 if (i == 0 && j == len && PyString_CheckExact(self)) {
1845 Py_INCREF(self);
1846 return (PyObject*)self;
1847 }
1848 else
1849 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001850}
1851
1852
1853Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001854do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001855{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001856 char *s = PyString_AS_STRING(self);
1857 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001858
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001859 i = 0;
1860 if (striptype != RIGHTSTRIP) {
1861 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1862 i++;
1863 }
1864 }
Christian Heimes44720832008-05-26 13:01:01 +00001865
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001866 j = len;
1867 if (striptype != LEFTSTRIP) {
1868 do {
1869 j--;
1870 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1871 j++;
1872 }
Christian Heimes44720832008-05-26 13:01:01 +00001873
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001874 if (i == 0 && j == len && PyString_CheckExact(self)) {
1875 Py_INCREF(self);
1876 return (PyObject*)self;
1877 }
1878 else
1879 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001880}
1881
1882
1883Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001884do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001885{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001886 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001887
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001888 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1889 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001890
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001891 if (sep != NULL && sep != Py_None) {
1892 if (PyString_Check(sep))
1893 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001894#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001895 else if (PyUnicode_Check(sep)) {
1896 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1897 PyObject *res;
1898 if (uniself==NULL)
1899 return NULL;
1900 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1901 striptype, sep);
1902 Py_DECREF(uniself);
1903 return res;
1904 }
Christian Heimes44720832008-05-26 13:01:01 +00001905#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001906 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001907#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001908 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001909#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001911#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001912 STRIPNAME(striptype));
1913 return NULL;
1914 }
Christian Heimes44720832008-05-26 13:01:01 +00001915
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001916 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001917}
1918
1919
1920PyDoc_STRVAR(strip__doc__,
1921"S.strip([chars]) -> string or unicode\n\
1922\n\
1923Return a copy of the string S with leading and trailing\n\
1924whitespace removed.\n\
1925If chars is given and not None, remove characters in chars instead.\n\
1926If chars is unicode, S will be converted to unicode before stripping");
1927
1928static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001929string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001930{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001931 if (PyTuple_GET_SIZE(args) == 0)
1932 return do_strip(self, BOTHSTRIP); /* Common case */
1933 else
1934 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001935}
1936
1937
1938PyDoc_STRVAR(lstrip__doc__,
1939"S.lstrip([chars]) -> string or unicode\n\
1940\n\
1941Return a copy of the string S with leading whitespace removed.\n\
1942If chars is given and not None, remove characters in chars instead.\n\
1943If chars is unicode, S will be converted to unicode before stripping");
1944
1945static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001946string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001947{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001948 if (PyTuple_GET_SIZE(args) == 0)
1949 return do_strip(self, LEFTSTRIP); /* Common case */
1950 else
1951 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001952}
1953
1954
1955PyDoc_STRVAR(rstrip__doc__,
1956"S.rstrip([chars]) -> string or unicode\n\
1957\n\
1958Return a copy of the string S with trailing whitespace removed.\n\
1959If chars is given and not None, remove characters in chars instead.\n\
1960If chars is unicode, S will be converted to unicode before stripping");
1961
1962static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001963string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001964{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001965 if (PyTuple_GET_SIZE(args) == 0)
1966 return do_strip(self, RIGHTSTRIP); /* Common case */
1967 else
1968 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001969}
1970
1971
1972PyDoc_STRVAR(lower__doc__,
1973"S.lower() -> string\n\
1974\n\
1975Return a copy of the string S converted to lowercase.");
1976
1977/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1978#ifndef _tolower
1979#define _tolower tolower
1980#endif
1981
1982static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001983string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001984{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001985 char *s;
1986 Py_ssize_t i, n = PyString_GET_SIZE(self);
1987 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001988
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001989 newobj = PyString_FromStringAndSize(NULL, n);
1990 if (!newobj)
1991 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001992
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001993 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001994
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001995 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001996
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001997 for (i = 0; i < n; i++) {
1998 int c = Py_CHARMASK(s[i]);
1999 if (isupper(c))
2000 s[i] = _tolower(c);
2001 }
Christian Heimes44720832008-05-26 13:01:01 +00002002
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002003 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002004}
2005
2006PyDoc_STRVAR(upper__doc__,
2007"S.upper() -> string\n\
2008\n\
2009Return a copy of the string S converted to uppercase.");
2010
2011#ifndef _toupper
2012#define _toupper toupper
2013#endif
2014
2015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002016string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002017{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002018 char *s;
2019 Py_ssize_t i, n = PyString_GET_SIZE(self);
2020 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002021
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002022 newobj = PyString_FromStringAndSize(NULL, n);
2023 if (!newobj)
2024 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002026 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002027
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002028 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002029
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002030 for (i = 0; i < n; i++) {
2031 int c = Py_CHARMASK(s[i]);
2032 if (islower(c))
2033 s[i] = _toupper(c);
2034 }
Christian Heimes44720832008-05-26 13:01:01 +00002035
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002036 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002037}
2038
2039PyDoc_STRVAR(title__doc__,
2040"S.title() -> string\n\
2041\n\
2042Return a titlecased version of S, i.e. words start with uppercase\n\
2043characters, all remaining cased characters have lowercase.");
2044
2045static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002046string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002047{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002048 char *s = PyString_AS_STRING(self), *s_new;
2049 Py_ssize_t i, n = PyString_GET_SIZE(self);
2050 int previous_is_cased = 0;
2051 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002052
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002053 newobj = PyString_FromStringAndSize(NULL, n);
2054 if (newobj == NULL)
2055 return NULL;
2056 s_new = PyString_AsString(newobj);
2057 for (i = 0; i < n; i++) {
2058 int c = Py_CHARMASK(*s++);
2059 if (islower(c)) {
2060 if (!previous_is_cased)
2061 c = toupper(c);
2062 previous_is_cased = 1;
2063 } else if (isupper(c)) {
2064 if (previous_is_cased)
2065 c = tolower(c);
2066 previous_is_cased = 1;
2067 } else
2068 previous_is_cased = 0;
2069 *s_new++ = c;
2070 }
2071 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002072}
2073
2074PyDoc_STRVAR(capitalize__doc__,
2075"S.capitalize() -> string\n\
2076\n\
2077Return a copy of the string S with only its first character\n\
2078capitalized.");
2079
2080static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002081string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002082{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002083 char *s = PyString_AS_STRING(self), *s_new;
2084 Py_ssize_t i, n = PyString_GET_SIZE(self);
2085 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002086
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002087 newobj = PyString_FromStringAndSize(NULL, n);
2088 if (newobj == NULL)
2089 return NULL;
2090 s_new = PyString_AsString(newobj);
2091 if (0 < n) {
2092 int c = Py_CHARMASK(*s++);
2093 if (islower(c))
2094 *s_new = toupper(c);
2095 else
2096 *s_new = c;
2097 s_new++;
2098 }
2099 for (i = 1; i < n; i++) {
2100 int c = Py_CHARMASK(*s++);
2101 if (isupper(c))
2102 *s_new = tolower(c);
2103 else
2104 *s_new = c;
2105 s_new++;
2106 }
2107 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002108}
2109
2110
2111PyDoc_STRVAR(count__doc__,
2112"S.count(sub[, start[, end]]) -> int\n\
2113\n\
2114Return the number of non-overlapping occurrences of substring sub in\n\
2115string S[start:end]. Optional arguments start and end are interpreted\n\
2116as in slice notation.");
2117
2118static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002119string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002120{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002121 PyObject *sub_obj;
2122 const char *str = PyString_AS_STRING(self), *sub;
2123 Py_ssize_t sub_len;
2124 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002125
Jesus Cea44e81682011-04-20 16:39:15 +02002126 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002127 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002128
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002129 if (PyString_Check(sub_obj)) {
2130 sub = PyString_AS_STRING(sub_obj);
2131 sub_len = PyString_GET_SIZE(sub_obj);
2132 }
Christian Heimes44720832008-05-26 13:01:01 +00002133#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002134 else if (PyUnicode_Check(sub_obj)) {
2135 Py_ssize_t count;
2136 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2137 if (count == -1)
2138 return NULL;
2139 else
2140 return PyInt_FromSsize_t(count);
2141 }
Christian Heimes44720832008-05-26 13:01:01 +00002142#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002143 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2144 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002145
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002146 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002147
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002148 return PyInt_FromSsize_t(
2149 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2150 );
Christian Heimes44720832008-05-26 13:01:01 +00002151}
2152
2153PyDoc_STRVAR(swapcase__doc__,
2154"S.swapcase() -> string\n\
2155\n\
2156Return a copy of the string S with uppercase characters\n\
2157converted to lowercase and vice versa.");
2158
2159static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002160string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002161{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002162 char *s = PyString_AS_STRING(self), *s_new;
2163 Py_ssize_t i, n = PyString_GET_SIZE(self);
2164 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002166 newobj = PyString_FromStringAndSize(NULL, n);
2167 if (newobj == NULL)
2168 return NULL;
2169 s_new = PyString_AsString(newobj);
2170 for (i = 0; i < n; i++) {
2171 int c = Py_CHARMASK(*s++);
2172 if (islower(c)) {
2173 *s_new = toupper(c);
2174 }
2175 else if (isupper(c)) {
2176 *s_new = tolower(c);
2177 }
2178 else
2179 *s_new = c;
2180 s_new++;
2181 }
2182 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002183}
2184
2185
2186PyDoc_STRVAR(translate__doc__,
2187"S.translate(table [,deletechars]) -> string\n\
2188\n\
2189Return a copy of the string S, where all characters occurring\n\
2190in the optional argument deletechars are removed, and the\n\
2191remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002192translation table, which must be a string of length 256 or None.\n\
2193If the table argument is None, no translation is applied and\n\
2194the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002195
2196static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002197string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002198{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002199 register char *input, *output;
2200 const char *table;
2201 register Py_ssize_t i, c, changed = 0;
2202 PyObject *input_obj = (PyObject*)self;
2203 const char *output_start, *del_table=NULL;
2204 Py_ssize_t inlen, tablen, dellen = 0;
2205 PyObject *result;
2206 int trans_table[256];
2207 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002208
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002209 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2210 &tableobj, &delobj))
2211 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002212
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002213 if (PyString_Check(tableobj)) {
2214 table = PyString_AS_STRING(tableobj);
2215 tablen = PyString_GET_SIZE(tableobj);
2216 }
2217 else if (tableobj == Py_None) {
2218 table = NULL;
2219 tablen = 256;
2220 }
Christian Heimes44720832008-05-26 13:01:01 +00002221#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002222 else if (PyUnicode_Check(tableobj)) {
2223 /* Unicode .translate() does not support the deletechars
2224 parameter; instead a mapping to None will cause characters
2225 to be deleted. */
2226 if (delobj != NULL) {
2227 PyErr_SetString(PyExc_TypeError,
2228 "deletions are implemented differently for unicode");
2229 return NULL;
2230 }
2231 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2232 }
Christian Heimes44720832008-05-26 13:01:01 +00002233#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002234 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2235 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002236
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002237 if (tablen != 256) {
2238 PyErr_SetString(PyExc_ValueError,
2239 "translation table must be 256 characters long");
2240 return NULL;
2241 }
Christian Heimes44720832008-05-26 13:01:01 +00002242
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002243 if (delobj != NULL) {
2244 if (PyString_Check(delobj)) {
2245 del_table = PyString_AS_STRING(delobj);
2246 dellen = PyString_GET_SIZE(delobj);
2247 }
Christian Heimes44720832008-05-26 13:01:01 +00002248#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002249 else if (PyUnicode_Check(delobj)) {
2250 PyErr_SetString(PyExc_TypeError,
2251 "deletions are implemented differently for unicode");
2252 return NULL;
2253 }
Christian Heimes44720832008-05-26 13:01:01 +00002254#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002255 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2256 return NULL;
2257 }
2258 else {
2259 del_table = NULL;
2260 dellen = 0;
2261 }
Christian Heimes44720832008-05-26 13:01:01 +00002262
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002263 inlen = PyString_GET_SIZE(input_obj);
2264 result = PyString_FromStringAndSize((char *)NULL, inlen);
2265 if (result == NULL)
2266 return NULL;
2267 output_start = output = PyString_AsString(result);
2268 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002269
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002270 if (dellen == 0 && table != NULL) {
2271 /* If no deletions are required, use faster code */
2272 for (i = inlen; --i >= 0; ) {
2273 c = Py_CHARMASK(*input++);
2274 if (Py_CHARMASK((*output++ = table[c])) != c)
2275 changed = 1;
2276 }
2277 if (changed || !PyString_CheckExact(input_obj))
2278 return result;
2279 Py_DECREF(result);
2280 Py_INCREF(input_obj);
2281 return input_obj;
2282 }
Christian Heimes44720832008-05-26 13:01:01 +00002283
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002284 if (table == NULL) {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(i);
2287 } else {
2288 for (i = 0; i < 256; i++)
2289 trans_table[i] = Py_CHARMASK(table[i]);
2290 }
Christian Heimes44720832008-05-26 13:01:01 +00002291
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002292 for (i = 0; i < dellen; i++)
2293 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002294
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002295 for (i = inlen; --i >= 0; ) {
2296 c = Py_CHARMASK(*input++);
2297 if (trans_table[c] != -1)
2298 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2299 continue;
2300 changed = 1;
2301 }
2302 if (!changed && PyString_CheckExact(input_obj)) {
2303 Py_DECREF(result);
2304 Py_INCREF(input_obj);
2305 return input_obj;
2306 }
2307 /* Fix the size of the resulting string */
2308 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2309 return NULL;
2310 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002311}
2312
2313
Christian Heimes44720832008-05-26 13:01:01 +00002314/* find and count characters and substrings */
2315
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002316#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002317 ((char *)memchr((const void *)(target), c, target_len))
2318
2319/* String ops must return a string. */
2320/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002321Py_LOCAL(PyStringObject *)
2322return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002323{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002324 if (PyString_CheckExact(self)) {
2325 Py_INCREF(self);
2326 return self;
2327 }
2328 return (PyStringObject *)PyString_FromStringAndSize(
2329 PyString_AS_STRING(self),
2330 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002331}
2332
2333Py_LOCAL_INLINE(Py_ssize_t)
Ronald Oussoren3687e802013-07-11 13:33:55 +02002334countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002335{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002336 Py_ssize_t count=0;
2337 const char *start=target;
2338 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002339
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002340 while ( (start=findchar(start, end-start, c)) != NULL ) {
2341 count++;
2342 if (count >= maxcount)
2343 break;
2344 start += 1;
2345 }
2346 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002347}
2348
Christian Heimes44720832008-05-26 13:01:01 +00002349
2350/* Algorithms for different cases of string replacement */
2351
2352/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002353Py_LOCAL(PyStringObject *)
2354replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002355 const char *to_s, Py_ssize_t to_len,
2356 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002357{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002358 char *self_s, *result_s;
2359 Py_ssize_t self_len, result_len;
2360 Py_ssize_t count, i, product;
2361 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002362
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002363 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002364
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002365 /* 1 at the end plus 1 after every character */
2366 count = self_len+1;
2367 if (maxcount < count)
2368 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002369
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002370 /* Check for overflow */
2371 /* result_len = count * to_len + self_len; */
2372 product = count * to_len;
2373 if (product / to_len != count) {
2374 PyErr_SetString(PyExc_OverflowError,
2375 "replace string is too long");
2376 return NULL;
2377 }
2378 result_len = product + self_len;
2379 if (result_len < 0) {
2380 PyErr_SetString(PyExc_OverflowError,
2381 "replace string is too long");
2382 return NULL;
2383 }
Christian Heimes44720832008-05-26 13:01:01 +00002384
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002385 if (! (result = (PyStringObject *)
2386 PyString_FromStringAndSize(NULL, result_len)) )
2387 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 self_s = PyString_AS_STRING(self);
2390 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002391
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002392 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002393
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002394 /* Lay the first one down (guaranteed this will occur) */
2395 Py_MEMCPY(result_s, to_s, to_len);
2396 result_s += to_len;
2397 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002398
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002399 for (i=0; i<count; i++) {
2400 *result_s++ = *self_s++;
2401 Py_MEMCPY(result_s, to_s, to_len);
2402 result_s += to_len;
2403 }
2404
2405 /* Copy the rest of the original string */
2406 Py_MEMCPY(result_s, self_s, self_len-i);
2407
2408 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002409}
2410
2411/* Special case for deleting a single character */
2412/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002413Py_LOCAL(PyStringObject *)
2414replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002415 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002416{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002417 char *self_s, *result_s;
2418 char *start, *next, *end;
2419 Py_ssize_t self_len, result_len;
2420 Py_ssize_t count;
2421 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 self_len = PyString_GET_SIZE(self);
2424 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002425
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002426 count = countchar(self_s, self_len, from_c, maxcount);
2427 if (count == 0) {
2428 return return_self(self);
2429 }
Christian Heimes44720832008-05-26 13:01:01 +00002430
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002431 result_len = self_len - count; /* from_len == 1 */
2432 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002433
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002434 if ( (result = (PyStringObject *)
2435 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2436 return NULL;
2437 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002438
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002439 start = self_s;
2440 end = self_s + self_len;
2441 while (count-- > 0) {
2442 next = findchar(start, end-start, from_c);
2443 if (next == NULL)
2444 break;
2445 Py_MEMCPY(result_s, start, next-start);
2446 result_s += (next-start);
2447 start = next+1;
2448 }
2449 Py_MEMCPY(result_s, start, end-start);
2450
2451 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002452}
2453
2454/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2455
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002456Py_LOCAL(PyStringObject *)
2457replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002458 const char *from_s, Py_ssize_t from_len,
2459 Py_ssize_t maxcount) {
2460 char *self_s, *result_s;
2461 char *start, *next, *end;
2462 Py_ssize_t self_len, result_len;
2463 Py_ssize_t count, offset;
2464 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 self_len = PyString_GET_SIZE(self);
2467 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002468
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002469 count = stringlib_count(self_s, self_len,
2470 from_s, from_len,
2471 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002472
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002473 if (count == 0) {
2474 /* no matches */
2475 return return_self(self);
2476 }
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 result_len = self_len - (count * from_len);
2479 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002481 if ( (result = (PyStringObject *)
2482 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2483 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002484
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002485 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002487 start = self_s;
2488 end = self_s + self_len;
2489 while (count-- > 0) {
2490 offset = stringlib_find(start, end-start,
2491 from_s, from_len,
2492 0);
2493 if (offset == -1)
2494 break;
2495 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002496
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002497 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002498
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002499 result_s += (next-start);
2500 start = next+from_len;
2501 }
2502 Py_MEMCPY(result_s, start, end-start);
2503 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002504}
2505
2506/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002507Py_LOCAL(PyStringObject *)
2508replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 char from_c, char to_c,
2510 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002511{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002512 char *self_s, *result_s, *start, *end, *next;
2513 Py_ssize_t self_len;
2514 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002515
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002516 /* The result string will be the same size */
2517 self_s = PyString_AS_STRING(self);
2518 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002519
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002520 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002522 if (next == NULL) {
2523 /* No matches; return the original string */
2524 return return_self(self);
2525 }
Christian Heimes44720832008-05-26 13:01:01 +00002526
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002527 /* Need to make a new string */
2528 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2529 if (result == NULL)
2530 return NULL;
2531 result_s = PyString_AS_STRING(result);
2532 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002533
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002534 /* change everything in-place, starting with this one */
2535 start = result_s + (next-self_s);
2536 *start = to_c;
2537 start++;
2538 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002539
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002540 while (--maxcount > 0) {
2541 next = findchar(start, end-start, from_c);
2542 if (next == NULL)
2543 break;
2544 *next = to_c;
2545 start = next+1;
2546 }
Christian Heimes44720832008-05-26 13:01:01 +00002547
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002548 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002549}
2550
2551/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002552Py_LOCAL(PyStringObject *)
2553replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002554 const char *from_s, Py_ssize_t from_len,
2555 const char *to_s, Py_ssize_t to_len,
2556 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002557{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002558 char *result_s, *start, *end;
2559 char *self_s;
2560 Py_ssize_t self_len, offset;
2561 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002562
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002563 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 self_s = PyString_AS_STRING(self);
2566 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002567
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002568 offset = stringlib_find(self_s, self_len,
2569 from_s, from_len,
2570 0);
2571 if (offset == -1) {
2572 /* No matches; return the original string */
2573 return return_self(self);
2574 }
Christian Heimes44720832008-05-26 13:01:01 +00002575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002576 /* Need to make a new string */
2577 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2578 if (result == NULL)
2579 return NULL;
2580 result_s = PyString_AS_STRING(result);
2581 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002582
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002583 /* change everything in-place, starting with this one */
2584 start = result_s + offset;
2585 Py_MEMCPY(start, to_s, from_len);
2586 start += from_len;
2587 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002588
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002589 while ( --maxcount > 0) {
2590 offset = stringlib_find(start, end-start,
2591 from_s, from_len,
2592 0);
2593 if (offset==-1)
2594 break;
2595 Py_MEMCPY(start+offset, to_s, from_len);
2596 start += offset+from_len;
2597 }
Christian Heimes44720832008-05-26 13:01:01 +00002598
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002599 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002600}
2601
2602/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002603Py_LOCAL(PyStringObject *)
2604replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002605 char from_c,
2606 const char *to_s, Py_ssize_t to_len,
2607 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002608{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002609 char *self_s, *result_s;
2610 char *start, *next, *end;
2611 Py_ssize_t self_len, result_len;
2612 Py_ssize_t count, product;
2613 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 self_s = PyString_AS_STRING(self);
2616 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002617
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002618 count = countchar(self_s, self_len, from_c, maxcount);
2619 if (count == 0) {
2620 /* no matches, return unchanged */
2621 return return_self(self);
2622 }
Christian Heimes44720832008-05-26 13:01:01 +00002623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002624 /* use the difference between current and new, hence the "-1" */
2625 /* result_len = self_len + count * (to_len-1) */
2626 product = count * (to_len-1);
2627 if (product / (to_len-1) != count) {
2628 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2629 return NULL;
2630 }
2631 result_len = self_len + product;
2632 if (result_len < 0) {
2633 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2634 return NULL;
2635 }
Christian Heimes44720832008-05-26 13:01:01 +00002636
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002637 if ( (result = (PyStringObject *)
2638 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2639 return NULL;
2640 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002641
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002642 start = self_s;
2643 end = self_s + self_len;
2644 while (count-- > 0) {
2645 next = findchar(start, end-start, from_c);
2646 if (next == NULL)
2647 break;
Christian Heimes44720832008-05-26 13:01:01 +00002648
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002649 if (next == start) {
2650 /* replace with the 'to' */
2651 Py_MEMCPY(result_s, to_s, to_len);
2652 result_s += to_len;
2653 start += 1;
2654 } else {
2655 /* copy the unchanged old then the 'to' */
2656 Py_MEMCPY(result_s, start, next-start);
2657 result_s += (next-start);
2658 Py_MEMCPY(result_s, to_s, to_len);
2659 result_s += to_len;
2660 start = next+1;
2661 }
2662 }
2663 /* Copy the remainder of the remaining string */
2664 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002665
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002666 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002667}
2668
2669/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002670Py_LOCAL(PyStringObject *)
2671replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002672 const char *from_s, Py_ssize_t from_len,
2673 const char *to_s, Py_ssize_t to_len,
2674 Py_ssize_t maxcount) {
2675 char *self_s, *result_s;
2676 char *start, *next, *end;
2677 Py_ssize_t self_len, result_len;
2678 Py_ssize_t count, offset, product;
2679 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002680
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 self_s = PyString_AS_STRING(self);
2682 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002683
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002684 count = stringlib_count(self_s, self_len,
2685 from_s, from_len,
2686 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002687
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002688 if (count == 0) {
2689 /* no matches, return unchanged */
2690 return return_self(self);
2691 }
Christian Heimes44720832008-05-26 13:01:01 +00002692
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002693 /* Check for overflow */
2694 /* result_len = self_len + count * (to_len-from_len) */
2695 product = count * (to_len-from_len);
2696 if (product / (to_len-from_len) != count) {
2697 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2698 return NULL;
2699 }
2700 result_len = self_len + product;
2701 if (result_len < 0) {
2702 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2703 return NULL;
2704 }
Christian Heimes44720832008-05-26 13:01:01 +00002705
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002706 if ( (result = (PyStringObject *)
2707 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2708 return NULL;
2709 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002710
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002711 start = self_s;
2712 end = self_s + self_len;
2713 while (count-- > 0) {
2714 offset = stringlib_find(start, end-start,
2715 from_s, from_len,
2716 0);
2717 if (offset == -1)
2718 break;
2719 next = start+offset;
2720 if (next == start) {
2721 /* replace with the 'to' */
2722 Py_MEMCPY(result_s, to_s, to_len);
2723 result_s += to_len;
2724 start += from_len;
2725 } else {
2726 /* copy the unchanged old then the 'to' */
2727 Py_MEMCPY(result_s, start, next-start);
2728 result_s += (next-start);
2729 Py_MEMCPY(result_s, to_s, to_len);
2730 result_s += to_len;
2731 start = next+from_len;
2732 }
2733 }
2734 /* Copy the remainder of the remaining string */
2735 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002736
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002737 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002738}
2739
2740
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002741Py_LOCAL(PyStringObject *)
2742replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002743 const char *from_s, Py_ssize_t from_len,
2744 const char *to_s, Py_ssize_t to_len,
2745 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002746{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002747 if (maxcount < 0) {
2748 maxcount = PY_SSIZE_T_MAX;
2749 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2750 /* nothing to do; return the original string */
2751 return return_self(self);
2752 }
Christian Heimes44720832008-05-26 13:01:01 +00002753
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002754 if (maxcount == 0 ||
2755 (from_len == 0 && to_len == 0)) {
2756 /* nothing to do; return the original string */
2757 return return_self(self);
2758 }
Christian Heimes44720832008-05-26 13:01:01 +00002759
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002760 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002761
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002762 if (from_len == 0) {
2763 /* insert the 'to' string everywhere. */
2764 /* >>> "Python".replace("", ".") */
2765 /* '.P.y.t.h.o.n.' */
2766 return replace_interleave(self, to_s, to_len, maxcount);
2767 }
Christian Heimes44720832008-05-26 13:01:01 +00002768
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002769 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2770 /* point for an empty self string to generate a non-empty string */
2771 /* Special case so the remaining code always gets a non-empty string */
2772 if (PyString_GET_SIZE(self) == 0) {
2773 return return_self(self);
2774 }
Christian Heimes44720832008-05-26 13:01:01 +00002775
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002776 if (to_len == 0) {
2777 /* delete all occurances of 'from' string */
2778 if (from_len == 1) {
2779 return replace_delete_single_character(
2780 self, from_s[0], maxcount);
2781 } else {
2782 return replace_delete_substring(self, from_s, from_len, maxcount);
2783 }
2784 }
Christian Heimes44720832008-05-26 13:01:01 +00002785
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002786 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002787
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002788 if (from_len == to_len) {
2789 if (from_len == 1) {
2790 return replace_single_character_in_place(
2791 self,
2792 from_s[0],
2793 to_s[0],
2794 maxcount);
2795 } else {
2796 return replace_substring_in_place(
2797 self, from_s, from_len, to_s, to_len, maxcount);
2798 }
2799 }
Christian Heimes44720832008-05-26 13:01:01 +00002800
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002801 /* Otherwise use the more generic algorithms */
2802 if (from_len == 1) {
2803 return replace_single_character(self, from_s[0],
2804 to_s, to_len, maxcount);
2805 } else {
2806 /* len('from')>=2, len('to')>=1 */
2807 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2808 }
Christian Heimes44720832008-05-26 13:01:01 +00002809}
2810
2811PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002812"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002813\n\
2814Return a copy of string S with all occurrences of substring\n\
2815old replaced by new. If the optional argument count is\n\
2816given, only the first count occurrences are replaced.");
2817
2818static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002819string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002820{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002821 Py_ssize_t count = -1;
2822 PyObject *from, *to;
2823 const char *from_s, *to_s;
2824 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002826 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2827 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002828
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002829 if (PyString_Check(from)) {
2830 from_s = PyString_AS_STRING(from);
2831 from_len = PyString_GET_SIZE(from);
2832 }
Christian Heimes44720832008-05-26 13:01:01 +00002833#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002834 if (PyUnicode_Check(from))
2835 return PyUnicode_Replace((PyObject *)self,
2836 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002837#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002838 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2839 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002840
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002841 if (PyString_Check(to)) {
2842 to_s = PyString_AS_STRING(to);
2843 to_len = PyString_GET_SIZE(to);
2844 }
Christian Heimes44720832008-05-26 13:01:01 +00002845#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002846 else if (PyUnicode_Check(to))
2847 return PyUnicode_Replace((PyObject *)self,
2848 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002849#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2851 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002852
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002853 return (PyObject *)replace((PyStringObject *) self,
2854 from_s, from_len,
2855 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002856}
2857
2858/** End DALKE **/
2859
2860/* Matches the end (direction >= 0) or start (direction < 0) of self
2861 * against substr, using the start and end arguments. Returns
2862 * -1 on error, 0 if not found and 1 if found.
2863 */
2864Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002866 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002867{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002868 Py_ssize_t len = PyString_GET_SIZE(self);
2869 Py_ssize_t slen;
2870 const char* sub;
2871 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002872
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002873 if (PyString_Check(substr)) {
2874 sub = PyString_AS_STRING(substr);
2875 slen = PyString_GET_SIZE(substr);
2876 }
Christian Heimes44720832008-05-26 13:01:01 +00002877#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002878 else if (PyUnicode_Check(substr))
2879 return PyUnicode_Tailmatch((PyObject *)self,
2880 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002881#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002882 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2883 return -1;
2884 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002885
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002886 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002887
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002888 if (direction < 0) {
2889 /* startswith */
2890 if (start+slen > len)
2891 return 0;
2892 } else {
2893 /* endswith */
2894 if (end-start < slen || start > len)
2895 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002896
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002897 if (end-slen > start)
2898 start = end - slen;
2899 }
2900 if (end-start >= slen)
2901 return ! memcmp(str+start, sub, slen);
2902 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002903}
2904
2905
2906PyDoc_STRVAR(startswith__doc__,
2907"S.startswith(prefix[, start[, end]]) -> bool\n\
2908\n\
2909Return True if S starts with the specified prefix, False otherwise.\n\
2910With optional start, test S beginning at that position.\n\
2911With optional end, stop comparing S at that position.\n\
2912prefix can also be a tuple of strings to try.");
2913
2914static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002915string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002916{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002917 Py_ssize_t start = 0;
2918 Py_ssize_t end = PY_SSIZE_T_MAX;
2919 PyObject *subobj;
2920 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002921
Jesus Cea44e81682011-04-20 16:39:15 +02002922 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002923 return NULL;
2924 if (PyTuple_Check(subobj)) {
2925 Py_ssize_t i;
2926 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2927 result = _string_tailmatch(self,
2928 PyTuple_GET_ITEM(subobj, i),
2929 start, end, -1);
2930 if (result == -1)
2931 return NULL;
2932 else if (result) {
2933 Py_RETURN_TRUE;
2934 }
2935 }
2936 Py_RETURN_FALSE;
2937 }
2938 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002939 if (result == -1) {
2940 if (PyErr_ExceptionMatches(PyExc_TypeError))
2941 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2942 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002943 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002944 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002945 else
2946 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002947}
2948
2949
2950PyDoc_STRVAR(endswith__doc__,
2951"S.endswith(suffix[, start[, end]]) -> bool\n\
2952\n\
2953Return True if S ends with the specified suffix, False otherwise.\n\
2954With optional start, test S beginning at that position.\n\
2955With optional end, stop comparing S at that position.\n\
2956suffix can also be a tuple of strings to try.");
2957
2958static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002959string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002960{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002961 Py_ssize_t start = 0;
2962 Py_ssize_t end = PY_SSIZE_T_MAX;
2963 PyObject *subobj;
2964 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002965
Jesus Cea44e81682011-04-20 16:39:15 +02002966 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002967 return NULL;
2968 if (PyTuple_Check(subobj)) {
2969 Py_ssize_t i;
2970 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2971 result = _string_tailmatch(self,
2972 PyTuple_GET_ITEM(subobj, i),
2973 start, end, +1);
2974 if (result == -1)
2975 return NULL;
2976 else if (result) {
2977 Py_RETURN_TRUE;
2978 }
2979 }
2980 Py_RETURN_FALSE;
2981 }
2982 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002983 if (result == -1) {
2984 if (PyErr_ExceptionMatches(PyExc_TypeError))
2985 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2986 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002987 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002988 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002989 else
2990 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002991}
2992
2993
2994PyDoc_STRVAR(encode__doc__,
2995"S.encode([encoding[,errors]]) -> object\n\
2996\n\
2997Encodes S using the codec registered for encoding. encoding defaults\n\
2998to the default encoding. errors may be given to set a different error\n\
2999handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3000a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3001'xmlcharrefreplace' as well as any other name registered with\n\
3002codecs.register_error that is able to handle UnicodeEncodeErrors.");
3003
3004static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003005string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003006{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003007 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003008 char *encoding = NULL;
3009 char *errors = NULL;
3010 PyObject *v;
3011
Benjamin Peterson332d7212009-09-18 21:14:55 +00003012 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003013 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003014 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003015 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003016 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003017 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003019 PyErr_Format(PyExc_TypeError,
3020 "encoder did not return a string/unicode object "
3021 "(type=%.400s)",
3022 Py_TYPE(v)->tp_name);
3023 Py_DECREF(v);
3024 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003025 }
3026 return v;
3027
3028 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003029 return NULL;
3030}
3031
Christian Heimes44720832008-05-26 13:01:01 +00003032
3033PyDoc_STRVAR(decode__doc__,
3034"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003035\n\
Christian Heimes44720832008-05-26 13:01:01 +00003036Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003037to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003038handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3039a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003040as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003041able to handle UnicodeDecodeErrors.");
3042
3043static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003044string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003045{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003046 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003047 char *encoding = NULL;
3048 char *errors = NULL;
3049 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003050
Benjamin Peterson332d7212009-09-18 21:14:55 +00003051 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003052 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003053 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003054 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003055 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003056 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003057 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003058 PyErr_Format(PyExc_TypeError,
3059 "decoder did not return a string/unicode object "
3060 "(type=%.400s)",
3061 Py_TYPE(v)->tp_name);
3062 Py_DECREF(v);
3063 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003064 }
3065 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003066
Christian Heimes44720832008-05-26 13:01:01 +00003067 onError:
3068 return NULL;
3069}
3070
3071
3072PyDoc_STRVAR(expandtabs__doc__,
3073"S.expandtabs([tabsize]) -> string\n\
3074\n\
3075Return a copy of S where all tab characters are expanded using spaces.\n\
3076If tabsize is not given, a tab size of 8 characters is assumed.");
3077
3078static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003079string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003080{
3081 const char *e, *p, *qe;
3082 char *q;
3083 Py_ssize_t i, j, incr;
3084 PyObject *u;
3085 int tabsize = 8;
3086
3087 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003088 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003089
3090 /* First pass: determine size of output string */
3091 i = 0; /* chars up to and including most recent \n or \r */
3092 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003093 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3094 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003095 if (*p == '\t') {
3096 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003097 incr = tabsize - (j % tabsize);
3098 if (j > PY_SSIZE_T_MAX - incr)
3099 goto overflow1;
3100 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003101 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003102 }
3103 else {
3104 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003105 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003106 j++;
3107 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003108 if (i > PY_SSIZE_T_MAX - j)
3109 goto overflow1;
3110 i += j;
3111 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003112 }
3113 }
Christian Heimes44720832008-05-26 13:01:01 +00003114
3115 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003116 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003117
3118 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003119 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003120 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003121 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003122
3123 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003124 q = PyString_AS_STRING(u); /* next output char */
3125 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003126
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003127 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003128 if (*p == '\t') {
3129 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003130 i = tabsize - (j % tabsize);
3131 j += i;
3132 while (i--) {
3133 if (q >= qe)
3134 goto overflow2;
3135 *q++ = ' ';
3136 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003137 }
3138 }
3139 else {
3140 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003141 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003142 *q++ = *p;
3143 j++;
3144 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003145 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003146 }
Christian Heimes44720832008-05-26 13:01:01 +00003147
3148 return u;
3149
3150 overflow2:
3151 Py_DECREF(u);
3152 overflow1:
3153 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154 return NULL;
3155}
3156
3157Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003159{
3160 PyObject *u;
3161
3162 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003163 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003164 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003165 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003166
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003167 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003168 Py_INCREF(self);
3169 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003170 }
3171
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003172 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003173 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003174 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003175 if (left)
3176 memset(PyString_AS_STRING(u), fill, left);
3177 Py_MEMCPY(PyString_AS_STRING(u) + left,
3178 PyString_AS_STRING(self),
3179 PyString_GET_SIZE(self));
3180 if (right)
3181 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3182 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003183 }
3184
3185 return u;
3186}
3187
3188PyDoc_STRVAR(ljust__doc__,
3189"S.ljust(width[, fillchar]) -> string\n"
3190"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003191"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003192"done using the specified fill character (default is a space).");
3193
3194static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003195string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003196{
3197 Py_ssize_t width;
3198 char fillchar = ' ';
3199
3200 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003201 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003202
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003204 Py_INCREF(self);
3205 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003206 }
3207
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003208 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003209}
3210
3211
3212PyDoc_STRVAR(rjust__doc__,
3213"S.rjust(width[, fillchar]) -> string\n"
3214"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003215"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003216"done using the specified fill character (default is a space)");
3217
3218static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003219string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003220{
3221 Py_ssize_t width;
3222 char fillchar = ' ';
3223
3224 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003225 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003226
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003228 Py_INCREF(self);
3229 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003230 }
3231
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003232 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003233}
3234
3235
3236PyDoc_STRVAR(center__doc__,
3237"S.center(width[, fillchar]) -> string\n"
3238"\n"
3239"Return S centered in a string of length width. Padding is\n"
3240"done using the specified fill character (default is a space)");
3241
3242static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003244{
3245 Py_ssize_t marg, left;
3246 Py_ssize_t width;
3247 char fillchar = ' ';
3248
3249 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003250 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003251
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003252 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003253 Py_INCREF(self);
3254 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003255 }
3256
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003257 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003258 left = marg / 2 + (marg & width & 1);
3259
3260 return pad(self, left, marg - left, fillchar);
3261}
3262
3263PyDoc_STRVAR(zfill__doc__,
3264"S.zfill(width) -> string\n"
3265"\n"
3266"Pad a numeric string S with zeros on the left, to fill a field\n"
3267"of the specified width. The string S is never truncated.");
3268
3269static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003270string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003271{
3272 Py_ssize_t fill;
3273 PyObject *s;
3274 char *p;
3275 Py_ssize_t width;
3276
3277 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003278 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003279
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003280 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003281 if (PyString_CheckExact(self)) {
3282 Py_INCREF(self);
3283 return (PyObject*) self;
3284 }
3285 else
3286 return PyString_FromStringAndSize(
3287 PyString_AS_STRING(self),
3288 PyString_GET_SIZE(self)
3289 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003290 }
3291
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003292 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003293
Christian Heimes44720832008-05-26 13:01:01 +00003294 s = pad(self, fill, 0, '0');
3295
3296 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003297 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003298
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003299 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003300 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003301 /* move sign to beginning of string */
3302 p[0] = p[fill];
3303 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003304 }
3305
3306 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003307}
3308
Christian Heimes44720832008-05-26 13:01:01 +00003309PyDoc_STRVAR(isspace__doc__,
3310"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311\n\
Christian Heimes44720832008-05-26 13:01:01 +00003312Return True if all characters in S are whitespace\n\
3313and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003314
Christian Heimes44720832008-05-26 13:01:01 +00003315static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003316string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003317{
Christian Heimes44720832008-05-26 13:01:01 +00003318 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003319 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003320 register const unsigned char *e;
3321
3322 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003323 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 isspace(*p))
3325 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003326
3327 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003328 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003329 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003330
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003331 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003332 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003333 if (!isspace(*p))
3334 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003335 }
Christian Heimes44720832008-05-26 13:01:01 +00003336 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003337}
3338
Christian Heimes44720832008-05-26 13:01:01 +00003339
3340PyDoc_STRVAR(isalpha__doc__,
3341"S.isalpha() -> bool\n\
3342\n\
3343Return True if all characters in S are alphabetic\n\
3344and there is at least one character in S, False otherwise.");
3345
3346static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003347string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003348{
Christian Heimes44720832008-05-26 13:01:01 +00003349 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003350 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003351 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003352
Christian Heimes44720832008-05-26 13:01:01 +00003353 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003354 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 isalpha(*p))
3356 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003357
3358 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003360 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003361
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003362 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003363 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003364 if (!isalpha(*p))
3365 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003366 }
Christian Heimes44720832008-05-26 13:01:01 +00003367 return PyBool_FromLong(1);
3368}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003369
Christian Heimes44720832008-05-26 13:01:01 +00003370
3371PyDoc_STRVAR(isalnum__doc__,
3372"S.isalnum() -> bool\n\
3373\n\
3374Return True if all characters in S are alphanumeric\n\
3375and there is at least one character in S, False otherwise.");
3376
3377static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003378string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003379{
3380 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003381 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003382 register const unsigned char *e;
3383
3384 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003385 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 isalnum(*p))
3387 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003388
3389 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003390 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003391 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003393 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003394 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003395 if (!isalnum(*p))
3396 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003397 }
3398 return PyBool_FromLong(1);
3399}
3400
3401
3402PyDoc_STRVAR(isdigit__doc__,
3403"S.isdigit() -> bool\n\
3404\n\
3405Return True if all characters in S are digits\n\
3406and there is at least one character in S, False otherwise.");
3407
3408static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003409string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003410{
3411 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003412 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003413 register const unsigned char *e;
3414
3415 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003416 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 isdigit(*p))
3418 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003419
3420 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003422 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003424 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003425 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003426 if (!isdigit(*p))
3427 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003428 }
3429 return PyBool_FromLong(1);
3430}
3431
3432
3433PyDoc_STRVAR(islower__doc__,
3434"S.islower() -> bool\n\
3435\n\
3436Return True if all cased characters in S are lowercase and there is\n\
3437at least one cased character in S, False otherwise.");
3438
3439static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003440string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003441{
3442 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003443 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003444 register const unsigned char *e;
3445 int cased;
3446
3447 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003448 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003449 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003450
3451 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003452 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003453 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003454
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003455 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003456 cased = 0;
3457 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003458 if (isupper(*p))
3459 return PyBool_FromLong(0);
3460 else if (!cased && islower(*p))
3461 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003462 }
3463 return PyBool_FromLong(cased);
3464}
3465
3466
3467PyDoc_STRVAR(isupper__doc__,
3468"S.isupper() -> bool\n\
3469\n\
3470Return True if all cased characters in S are uppercase and there is\n\
3471at least one cased character in S, False otherwise.");
3472
3473static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003474string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003475{
3476 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003477 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003478 register const unsigned char *e;
3479 int cased;
3480
3481 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003482 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003483 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003484
3485 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003486 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003487 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003489 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003490 cased = 0;
3491 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003492 if (islower(*p))
3493 return PyBool_FromLong(0);
3494 else if (!cased && isupper(*p))
3495 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003496 }
3497 return PyBool_FromLong(cased);
3498}
3499
3500
3501PyDoc_STRVAR(istitle__doc__,
3502"S.istitle() -> bool\n\
3503\n\
3504Return True if S is a titlecased string and there is at least one\n\
3505character in S, i.e. uppercase characters may only follow uncased\n\
3506characters and lowercase characters only cased ones. Return False\n\
3507otherwise.");
3508
3509static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003510string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003511{
3512 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003513 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003514 register const unsigned char *e;
3515 int cased, previous_is_cased;
3516
3517 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003518 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003519 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003520
3521 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003522 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003523 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003524
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003525 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003526 cased = 0;
3527 previous_is_cased = 0;
3528 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003529 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003530
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003531 if (isupper(ch)) {
3532 if (previous_is_cased)
3533 return PyBool_FromLong(0);
3534 previous_is_cased = 1;
3535 cased = 1;
3536 }
3537 else if (islower(ch)) {
3538 if (!previous_is_cased)
3539 return PyBool_FromLong(0);
3540 previous_is_cased = 1;
3541 cased = 1;
3542 }
3543 else
3544 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003545 }
3546 return PyBool_FromLong(cased);
3547}
3548
3549
3550PyDoc_STRVAR(splitlines__doc__,
Raymond Hettingeraad5b022012-06-02 01:42:58 -04003551"S.splitlines(keepends=False) -> list of strings\n\
Christian Heimes44720832008-05-26 13:01:01 +00003552\n\
3553Return a list of the lines in S, breaking at line boundaries.\n\
3554Line breaks are not included in the resulting list unless keepends\n\
3555is given and true.");
3556
3557static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003558string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003559{
Christian Heimes44720832008-05-26 13:01:01 +00003560 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003561
3562 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003563 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003564
Antoine Pitrou64672132010-01-13 07:55:48 +00003565 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003566 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3567 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003568 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003569}
3570
Robert Schuppenies51df0642008-06-01 16:16:17 +00003571PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003572"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003573
3574static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003575string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003576{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003577 Py_ssize_t res;
3578 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3579 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003580}
3581
Christian Heimes1a6387e2008-03-26 12:49:49 +00003582static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003583string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003584{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003585 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003586}
3587
Christian Heimes1a6387e2008-03-26 12:49:49 +00003588
Christian Heimes44720832008-05-26 13:01:01 +00003589#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003590
Christian Heimes44720832008-05-26 13:01:01 +00003591PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003592"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003593\n\
Eric Smith6c840852010-11-06 19:43:44 +00003594Return a formatted version of S, using substitutions from args and kwargs.\n\
3595The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003596
Eric Smithdc13b792008-05-30 18:10:04 +00003597static PyObject *
3598string__format__(PyObject* self, PyObject* args)
3599{
3600 PyObject *format_spec;
3601 PyObject *result = NULL;
3602 PyObject *tmp = NULL;
3603
3604 /* If 2.x, convert format_spec to the same type as value */
3605 /* This is to allow things like u''.format('') */
3606 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003607 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003608 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003609 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3610 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3611 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003612 }
3613 tmp = PyObject_Str(format_spec);
3614 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003615 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003616 format_spec = tmp;
3617
3618 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003619 PyString_AS_STRING(format_spec),
3620 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003621done:
3622 Py_XDECREF(tmp);
3623 return result;
3624}
3625
Christian Heimes44720832008-05-26 13:01:01 +00003626PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003627"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003628\n\
Eric Smith6c840852010-11-06 19:43:44 +00003629Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003630
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003631
Christian Heimes1a6387e2008-03-26 12:49:49 +00003632static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003633string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003634 /* Counterparts of the obsolete stropmodule functions; except
3635 string.maketrans(). */
3636 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3637 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3638 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3639 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3640 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3641 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3642 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3643 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3644 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3645 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3646 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3647 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3648 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3649 capitalize__doc__},
3650 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3651 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3652 endswith__doc__},
3653 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3654 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3655 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3656 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3657 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3658 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3659 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3660 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3661 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3662 rpartition__doc__},
3663 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3664 startswith__doc__},
3665 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3666 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3667 swapcase__doc__},
3668 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3669 translate__doc__},
3670 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3671 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3672 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3673 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3674 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3675 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3676 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3677 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3678 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3679 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3680 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3681 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3682 expandtabs__doc__},
3683 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3684 splitlines__doc__},
3685 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3686 sizeof__doc__},
3687 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3688 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003689};
3690
3691static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003692str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003693
Christian Heimes44720832008-05-26 13:01:01 +00003694static PyObject *
3695string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3696{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003697 PyObject *x = NULL;
3698 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003699
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003700 if (type != &PyString_Type)
3701 return str_subtype_new(type, args, kwds);
3702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3703 return NULL;
3704 if (x == NULL)
3705 return PyString_FromString("");
3706 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003707}
3708
3709static PyObject *
3710str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3711{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003712 PyObject *tmp, *pnew;
3713 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003715 assert(PyType_IsSubtype(type, &PyString_Type));
3716 tmp = string_new(&PyString_Type, args, kwds);
3717 if (tmp == NULL)
3718 return NULL;
3719 assert(PyString_CheckExact(tmp));
3720 n = PyString_GET_SIZE(tmp);
3721 pnew = type->tp_alloc(type, n);
3722 if (pnew != NULL) {
3723 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3724 ((PyStringObject *)pnew)->ob_shash =
3725 ((PyStringObject *)tmp)->ob_shash;
3726 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3727 }
3728 Py_DECREF(tmp);
3729 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003730}
3731
3732static PyObject *
3733basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3734{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003735 PyErr_SetString(PyExc_TypeError,
3736 "The basestring type cannot be instantiated");
3737 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003738}
3739
3740static PyObject *
3741string_mod(PyObject *v, PyObject *w)
3742{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003743 if (!PyString_Check(v)) {
3744 Py_INCREF(Py_NotImplemented);
3745 return Py_NotImplemented;
3746 }
3747 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003748}
3749
3750PyDoc_STRVAR(basestring_doc,
3751"Type basestring cannot be instantiated; it is the base for str and unicode.");
3752
3753static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003754 0, /*nb_add*/
3755 0, /*nb_subtract*/
3756 0, /*nb_multiply*/
3757 0, /*nb_divide*/
3758 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003759};
3760
3761
3762PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003763 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3764 "basestring",
3765 0,
3766 0,
3767 0, /* tp_dealloc */
3768 0, /* tp_print */
3769 0, /* tp_getattr */
3770 0, /* tp_setattr */
3771 0, /* tp_compare */
3772 0, /* tp_repr */
3773 0, /* tp_as_number */
3774 0, /* tp_as_sequence */
3775 0, /* tp_as_mapping */
3776 0, /* tp_hash */
3777 0, /* tp_call */
3778 0, /* tp_str */
3779 0, /* tp_getattro */
3780 0, /* tp_setattro */
3781 0, /* tp_as_buffer */
3782 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3783 basestring_doc, /* tp_doc */
3784 0, /* tp_traverse */
3785 0, /* tp_clear */
3786 0, /* tp_richcompare */
3787 0, /* tp_weaklistoffset */
3788 0, /* tp_iter */
3789 0, /* tp_iternext */
3790 0, /* tp_methods */
3791 0, /* tp_members */
3792 0, /* tp_getset */
3793 &PyBaseObject_Type, /* tp_base */
3794 0, /* tp_dict */
3795 0, /* tp_descr_get */
3796 0, /* tp_descr_set */
3797 0, /* tp_dictoffset */
3798 0, /* tp_init */
3799 0, /* tp_alloc */
3800 basestring_new, /* tp_new */
3801 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003802};
3803
3804PyDoc_STRVAR(string_doc,
Chris Jerdonekad4b0002012-10-07 20:37:54 -07003805"str(object='') -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003806\n\
3807Return a nice string representation of the object.\n\
3808If the argument is a string, the return value is the same object.");
3809
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003810PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003811 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3812 "str",
3813 PyStringObject_SIZE,
3814 sizeof(char),
3815 string_dealloc, /* tp_dealloc */
3816 (printfunc)string_print, /* tp_print */
3817 0, /* tp_getattr */
3818 0, /* tp_setattr */
3819 0, /* tp_compare */
3820 string_repr, /* tp_repr */
3821 &string_as_number, /* tp_as_number */
3822 &string_as_sequence, /* tp_as_sequence */
3823 &string_as_mapping, /* tp_as_mapping */
3824 (hashfunc)string_hash, /* tp_hash */
3825 0, /* tp_call */
3826 string_str, /* tp_str */
3827 PyObject_GenericGetAttr, /* tp_getattro */
3828 0, /* tp_setattro */
3829 &string_as_buffer, /* tp_as_buffer */
3830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3831 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3832 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3833 string_doc, /* tp_doc */
3834 0, /* tp_traverse */
3835 0, /* tp_clear */
3836 (richcmpfunc)string_richcompare, /* tp_richcompare */
3837 0, /* tp_weaklistoffset */
3838 0, /* tp_iter */
3839 0, /* tp_iternext */
3840 string_methods, /* tp_methods */
3841 0, /* tp_members */
3842 0, /* tp_getset */
3843 &PyBaseString_Type, /* tp_base */
3844 0, /* tp_dict */
3845 0, /* tp_descr_get */
3846 0, /* tp_descr_set */
3847 0, /* tp_dictoffset */
3848 0, /* tp_init */
3849 0, /* tp_alloc */
3850 string_new, /* tp_new */
3851 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003852};
3853
3854void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003857 register PyObject *v;
3858 if (*pv == NULL)
3859 return;
3860 if (w == NULL || !PyString_Check(*pv)) {
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02003861 Py_CLEAR(*pv);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003862 return;
3863 }
3864 v = string_concat((PyStringObject *) *pv, w);
3865 Py_DECREF(*pv);
3866 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003867}
3868
3869void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003870PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003871{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003872 PyString_Concat(pv, w);
3873 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003874}
3875
3876
3877/* The following function breaks the notion that strings are immutable:
3878 it changes the size of a string. We get away with this only if there
3879 is only one module referencing the object. You can also think of it
3880 as creating a new string object and destroying the old one, only
3881 more efficiently. In any case, don't use this if the string may
3882 already be known to some other part of the code...
3883 Note that if there's not enough memory to resize the string, the original
3884 string object at *pv is deallocated, *pv is set to NULL, an "out of
3885 memory" exception is set, and -1 is returned. Else (on success) 0 is
3886 returned, and the value in *pv may or may not be the same as on input.
3887 As always, an extra byte is allocated for a trailing \0 byte (newsize
3888 does *not* include that), and a trailing \0 byte is stored.
3889*/
3890
3891int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003892_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003893{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003894 register PyObject *v;
3895 register PyStringObject *sv;
3896 v = *pv;
3897 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3898 PyString_CHECK_INTERNED(v)) {
3899 *pv = 0;
3900 Py_DECREF(v);
3901 PyErr_BadInternalCall();
3902 return -1;
3903 }
3904 /* XXX UNREF/NEWREF interface should be more symmetrical */
3905 _Py_DEC_REFTOTAL;
3906 _Py_ForgetReference(v);
3907 *pv = (PyObject *)
3908 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3909 if (*pv == NULL) {
3910 PyObject_Del(v);
3911 PyErr_NoMemory();
3912 return -1;
3913 }
3914 _Py_NewReference(*pv);
3915 sv = (PyStringObject *) *pv;
3916 Py_SIZE(sv) = newsize;
3917 sv->ob_sval[newsize] = '\0';
3918 sv->ob_shash = -1; /* invalidate cached hash value */
3919 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003920}
3921
3922/* Helpers for formatstring */
3923
3924Py_LOCAL_INLINE(PyObject *)
3925getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3926{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003927 Py_ssize_t argidx = *p_argidx;
3928 if (argidx < arglen) {
3929 (*p_argidx)++;
3930 if (arglen < 0)
3931 return args;
3932 else
3933 return PyTuple_GetItem(args, argidx);
3934 }
3935 PyErr_SetString(PyExc_TypeError,
3936 "not enough arguments for format string");
3937 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003938}
3939
3940/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003941 * F_LJUST '-'
3942 * F_SIGN '+'
3943 * F_BLANK ' '
3944 * F_ALT '#'
3945 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003946 */
3947#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003948#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003949#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003950#define F_ALT (1<<3)
3951#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003952
Mark Dickinson18cfada2009-11-23 18:46:41 +00003953/* Returns a new reference to a PyString object, or NULL on failure. */
3954
3955static PyObject *
3956formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003957{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003958 char *p;
3959 PyObject *result;
3960 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003961
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003962 x = PyFloat_AsDouble(v);
3963 if (x == -1.0 && PyErr_Occurred()) {
3964 PyErr_Format(PyExc_TypeError, "float argument required, "
3965 "not %.200s", Py_TYPE(v)->tp_name);
3966 return NULL;
3967 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003968
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003969 if (prec < 0)
3970 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003971
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003972 p = PyOS_double_to_string(x, type, prec,
3973 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003974
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003975 if (p == NULL)
3976 return NULL;
3977 result = PyString_FromStringAndSize(p, strlen(p));
3978 PyMem_Free(p);
3979 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003980}
3981
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003982/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003983 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3984 * Python's regular ints.
3985 * Return value: a new PyString*, or NULL if error.
3986 * . *pbuf is set to point into it,
3987 * *plen set to the # of chars following that.
3988 * Caller must decref it when done using pbuf.
3989 * The string starting at *pbuf is of the form
3990 * "-"? ("0x" | "0X")? digit+
3991 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3992 * set in flags. The case of hex digits will be correct,
3993 * There will be at least prec digits, zero-filled on the left if
3994 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003995 * val object to be converted
3996 * flags bitmask of format flags; only F_ALT is looked at
3997 * prec minimum number of digits; 0-fill on left if needed
3998 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003999 *
4000 * CAUTION: o, x and X conversions on regular ints can never
4001 * produce a '-' sign, but can for Python's unbounded ints.
4002 */
4003PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004004_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004005 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004007 PyObject *result = NULL;
4008 char *buf;
4009 Py_ssize_t i;
4010 int sign; /* 1 if '-', else 0 */
4011 int len; /* number of characters */
4012 Py_ssize_t llen;
4013 int numdigits; /* len == numnondigits + numdigits */
4014 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004016 switch (type) {
4017 case 'd':
4018 case 'u':
4019 result = Py_TYPE(val)->tp_str(val);
4020 break;
4021 case 'o':
4022 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4023 break;
4024 case 'x':
4025 case 'X':
4026 numnondigits = 2;
4027 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4028 break;
4029 default:
4030 assert(!"'type' not in [duoxX]");
4031 }
4032 if (!result)
4033 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004034
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004035 buf = PyString_AsString(result);
4036 if (!buf) {
4037 Py_DECREF(result);
4038 return NULL;
4039 }
Christian Heimes44720832008-05-26 13:01:01 +00004040
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004041 /* To modify the string in-place, there can only be one reference. */
4042 if (Py_REFCNT(result) != 1) {
4043 PyErr_BadInternalCall();
4044 return NULL;
4045 }
4046 llen = PyString_Size(result);
4047 if (llen > INT_MAX) {
4048 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4049 return NULL;
4050 }
4051 len = (int)llen;
4052 if (buf[len-1] == 'L') {
4053 --len;
4054 buf[len] = '\0';
4055 }
4056 sign = buf[0] == '-';
4057 numnondigits += sign;
4058 numdigits = len - numnondigits;
4059 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004060
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004061 /* Get rid of base marker unless F_ALT */
4062 if ((flags & F_ALT) == 0) {
4063 /* Need to skip 0x, 0X or 0. */
4064 int skipped = 0;
4065 switch (type) {
4066 case 'o':
4067 assert(buf[sign] == '0');
4068 /* If 0 is only digit, leave it alone. */
4069 if (numdigits > 1) {
4070 skipped = 1;
4071 --numdigits;
4072 }
4073 break;
4074 case 'x':
4075 case 'X':
4076 assert(buf[sign] == '0');
4077 assert(buf[sign + 1] == 'x');
4078 skipped = 2;
4079 numnondigits -= 2;
4080 break;
4081 }
4082 if (skipped) {
4083 buf += skipped;
4084 len -= skipped;
4085 if (sign)
4086 buf[0] = '-';
4087 }
4088 assert(len == numnondigits + numdigits);
4089 assert(numdigits > 0);
4090 }
Christian Heimes44720832008-05-26 13:01:01 +00004091
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004092 /* Fill with leading zeroes to meet minimum width. */
4093 if (prec > numdigits) {
4094 PyObject *r1 = PyString_FromStringAndSize(NULL,
4095 numnondigits + prec);
4096 char *b1;
4097 if (!r1) {
4098 Py_DECREF(result);
4099 return NULL;
4100 }
4101 b1 = PyString_AS_STRING(r1);
4102 for (i = 0; i < numnondigits; ++i)
4103 *b1++ = *buf++;
4104 for (i = 0; i < prec - numdigits; i++)
4105 *b1++ = '0';
4106 for (i = 0; i < numdigits; i++)
4107 *b1++ = *buf++;
4108 *b1 = '\0';
4109 Py_DECREF(result);
4110 result = r1;
4111 buf = PyString_AS_STRING(result);
4112 len = numnondigits + prec;
4113 }
Christian Heimes44720832008-05-26 13:01:01 +00004114
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004115 /* Fix up case for hex conversions. */
4116 if (type == 'X') {
4117 /* Need to convert all lower case letters to upper case.
4118 and need to convert 0x to 0X (and -0x to -0X). */
4119 for (i = 0; i < len; i++)
4120 if (buf[i] >= 'a' && buf[i] <= 'x')
4121 buf[i] -= 'a'-'A';
4122 }
4123 *pbuf = buf;
4124 *plen = len;
4125 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004126}
4127
4128Py_LOCAL_INLINE(int)
4129formatint(char *buf, size_t buflen, int flags,
4130 int prec, int type, PyObject *v)
4131{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004132 /* fmt = '%#.' + `prec` + 'l' + `type`
4133 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4134 + 1 + 1 = 24 */
4135 char fmt[64]; /* plenty big enough! */
4136 char *sign;
4137 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004138
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004139 x = PyInt_AsLong(v);
4140 if (x == -1 && PyErr_Occurred()) {
4141 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4142 Py_TYPE(v)->tp_name);
4143 return -1;
4144 }
4145 if (x < 0 && type == 'u') {
4146 type = 'd';
4147 }
4148 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4149 sign = "-";
4150 else
4151 sign = "";
4152 if (prec < 0)
4153 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004154
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004155 if ((flags & F_ALT) &&
4156 (type == 'x' || type == 'X')) {
4157 /* When converting under %#x or %#X, there are a number
4158 * of issues that cause pain:
4159 * - when 0 is being converted, the C standard leaves off
4160 * the '0x' or '0X', which is inconsistent with other
4161 * %#x/%#X conversions and inconsistent with Python's
4162 * hex() function
4163 * - there are platforms that violate the standard and
4164 * convert 0 with the '0x' or '0X'
4165 * (Metrowerks, Compaq Tru64)
4166 * - there are platforms that give '0x' when converting
4167 * under %#X, but convert 0 in accordance with the
4168 * standard (OS/2 EMX)
4169 *
4170 * We can achieve the desired consistency by inserting our
4171 * own '0x' or '0X' prefix, and substituting %x/%X in place
4172 * of %#x/%#X.
4173 *
4174 * Note that this is the same approach as used in
4175 * formatint() in unicodeobject.c
4176 */
4177 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4178 sign, type, prec, type);
4179 }
4180 else {
4181 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4182 sign, (flags&F_ALT) ? "#" : "",
4183 prec, type);
4184 }
Christian Heimes44720832008-05-26 13:01:01 +00004185
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004186 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4187 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4188 */
4189 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4190 PyErr_SetString(PyExc_OverflowError,
4191 "formatted integer is too long (precision too large?)");
4192 return -1;
4193 }
4194 if (sign[0])
4195 PyOS_snprintf(buf, buflen, fmt, -x);
4196 else
4197 PyOS_snprintf(buf, buflen, fmt, x);
4198 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004199}
4200
4201Py_LOCAL_INLINE(int)
4202formatchar(char *buf, size_t buflen, PyObject *v)
4203{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004204 /* presume that the buffer is at least 2 characters long */
4205 if (PyString_Check(v)) {
4206 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4207 return -1;
4208 }
4209 else {
4210 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4211 return -1;
4212 }
4213 buf[1] = '\0';
4214 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004215}
4216
4217/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4218
Mark Dickinson18cfada2009-11-23 18:46:41 +00004219 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004220 chars are formatted. XXX This is a magic number. Each formatting
4221 routine does bounds checking to ensure no overflow, but a better
4222 solution may be to malloc a buffer of appropriate size for each
4223 format. For now, the current solution is sufficient.
4224*/
4225#define FORMATBUFLEN (size_t)120
4226
4227PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004228PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004229{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004230 char *fmt, *res;
4231 Py_ssize_t arglen, argidx;
4232 Py_ssize_t reslen, rescnt, fmtcnt;
4233 int args_owned = 0;
4234 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004235#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004236 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004237#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004238 PyObject *dict = NULL;
4239 if (format == NULL || !PyString_Check(format) || args == NULL) {
4240 PyErr_BadInternalCall();
4241 return NULL;
4242 }
4243 orig_args = args;
4244 fmt = PyString_AS_STRING(format);
4245 fmtcnt = PyString_GET_SIZE(format);
4246 reslen = rescnt = fmtcnt + 100;
4247 result = PyString_FromStringAndSize((char *)NULL, reslen);
4248 if (result == NULL)
4249 return NULL;
4250 res = PyString_AsString(result);
4251 if (PyTuple_Check(args)) {
4252 arglen = PyTuple_GET_SIZE(args);
4253 argidx = 0;
4254 }
4255 else {
4256 arglen = -1;
4257 argidx = -2;
4258 }
Benjamin Petersonda2c7eb2013-03-23 22:32:00 -05004259 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4260 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004261 dict = args;
4262 while (--fmtcnt >= 0) {
4263 if (*fmt != '%') {
4264 if (--rescnt < 0) {
4265 rescnt = fmtcnt + 100;
4266 reslen += rescnt;
4267 if (_PyString_Resize(&result, reslen))
4268 return NULL;
4269 res = PyString_AS_STRING(result)
4270 + reslen - rescnt;
4271 --rescnt;
4272 }
4273 *res++ = *fmt++;
4274 }
4275 else {
4276 /* Got a format specifier */
4277 int flags = 0;
4278 Py_ssize_t width = -1;
4279 int prec = -1;
4280 int c = '\0';
4281 int fill;
4282 int isnumok;
4283 PyObject *v = NULL;
4284 PyObject *temp = NULL;
4285 char *pbuf;
4286 int sign;
4287 Py_ssize_t len;
4288 char formatbuf[FORMATBUFLEN];
4289 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004290#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004291 char *fmt_start = fmt;
4292 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004293#endif
4294
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004295 fmt++;
4296 if (*fmt == '(') {
4297 char *keystart;
4298 Py_ssize_t keylen;
4299 PyObject *key;
4300 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004301
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004302 if (dict == NULL) {
4303 PyErr_SetString(PyExc_TypeError,
4304 "format requires a mapping");
4305 goto error;
4306 }
4307 ++fmt;
4308 --fmtcnt;
4309 keystart = fmt;
4310 /* Skip over balanced parentheses */
4311 while (pcount > 0 && --fmtcnt >= 0) {
4312 if (*fmt == ')')
4313 --pcount;
4314 else if (*fmt == '(')
4315 ++pcount;
4316 fmt++;
4317 }
4318 keylen = fmt - keystart - 1;
4319 if (fmtcnt < 0 || pcount > 0) {
4320 PyErr_SetString(PyExc_ValueError,
4321 "incomplete format key");
4322 goto error;
4323 }
4324 key = PyString_FromStringAndSize(keystart,
4325 keylen);
4326 if (key == NULL)
4327 goto error;
4328 if (args_owned) {
4329 Py_DECREF(args);
4330 args_owned = 0;
4331 }
4332 args = PyObject_GetItem(dict, key);
4333 Py_DECREF(key);
4334 if (args == NULL) {
4335 goto error;
4336 }
4337 args_owned = 1;
4338 arglen = -1;
4339 argidx = -2;
4340 }
4341 while (--fmtcnt >= 0) {
4342 switch (c = *fmt++) {
4343 case '-': flags |= F_LJUST; continue;
4344 case '+': flags |= F_SIGN; continue;
4345 case ' ': flags |= F_BLANK; continue;
4346 case '#': flags |= F_ALT; continue;
4347 case '0': flags |= F_ZERO; continue;
4348 }
4349 break;
4350 }
4351 if (c == '*') {
4352 v = getnextarg(args, arglen, &argidx);
4353 if (v == NULL)
4354 goto error;
4355 if (!PyInt_Check(v)) {
4356 PyErr_SetString(PyExc_TypeError,
4357 "* wants int");
4358 goto error;
4359 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004360 width = PyInt_AsSsize_t(v);
4361 if (width == -1 && PyErr_Occurred())
4362 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004363 if (width < 0) {
4364 flags |= F_LJUST;
4365 width = -width;
4366 }
4367 if (--fmtcnt >= 0)
4368 c = *fmt++;
4369 }
4370 else if (c >= 0 && isdigit(c)) {
4371 width = c - '0';
4372 while (--fmtcnt >= 0) {
4373 c = Py_CHARMASK(*fmt++);
4374 if (!isdigit(c))
4375 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004376 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004377 PyErr_SetString(
4378 PyExc_ValueError,
4379 "width too big");
4380 goto error;
4381 }
4382 width = width*10 + (c - '0');
4383 }
4384 }
4385 if (c == '.') {
4386 prec = 0;
4387 if (--fmtcnt >= 0)
4388 c = *fmt++;
4389 if (c == '*') {
4390 v = getnextarg(args, arglen, &argidx);
4391 if (v == NULL)
4392 goto error;
4393 if (!PyInt_Check(v)) {
4394 PyErr_SetString(
4395 PyExc_TypeError,
4396 "* wants int");
4397 goto error;
4398 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004399 prec = _PyInt_AsInt(v);
4400 if (prec == -1 && PyErr_Occurred())
4401 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004402 if (prec < 0)
4403 prec = 0;
4404 if (--fmtcnt >= 0)
4405 c = *fmt++;
4406 }
4407 else if (c >= 0 && isdigit(c)) {
4408 prec = c - '0';
4409 while (--fmtcnt >= 0) {
4410 c = Py_CHARMASK(*fmt++);
4411 if (!isdigit(c))
4412 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004413 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004414 PyErr_SetString(
4415 PyExc_ValueError,
4416 "prec too big");
4417 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004418 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004419 prec = prec*10 + (c - '0');
4420 }
4421 }
4422 } /* prec */
4423 if (fmtcnt >= 0) {
4424 if (c == 'h' || c == 'l' || c == 'L') {
4425 if (--fmtcnt >= 0)
4426 c = *fmt++;
4427 }
4428 }
4429 if (fmtcnt < 0) {
4430 PyErr_SetString(PyExc_ValueError,
4431 "incomplete format");
4432 goto error;
4433 }
4434 if (c != '%') {
4435 v = getnextarg(args, arglen, &argidx);
4436 if (v == NULL)
4437 goto error;
4438 }
4439 sign = 0;
4440 fill = ' ';
4441 switch (c) {
4442 case '%':
4443 pbuf = "%";
4444 len = 1;
4445 break;
4446 case 's':
4447#ifdef Py_USING_UNICODE
4448 if (PyUnicode_Check(v)) {
4449 fmt = fmt_start;
4450 argidx = argidx_start;
4451 goto unicode;
4452 }
4453#endif
4454 temp = _PyObject_Str(v);
4455#ifdef Py_USING_UNICODE
4456 if (temp != NULL && PyUnicode_Check(temp)) {
4457 Py_DECREF(temp);
4458 fmt = fmt_start;
4459 argidx = argidx_start;
4460 goto unicode;
4461 }
4462#endif
4463 /* Fall through */
4464 case 'r':
4465 if (c == 'r')
4466 temp = PyObject_Repr(v);
4467 if (temp == NULL)
4468 goto error;
4469 if (!PyString_Check(temp)) {
4470 PyErr_SetString(PyExc_TypeError,
4471 "%s argument has non-string str()");
4472 Py_DECREF(temp);
4473 goto error;
4474 }
4475 pbuf = PyString_AS_STRING(temp);
4476 len = PyString_GET_SIZE(temp);
4477 if (prec >= 0 && len > prec)
4478 len = prec;
4479 break;
4480 case 'i':
4481 case 'd':
4482 case 'u':
4483 case 'o':
4484 case 'x':
4485 case 'X':
4486 if (c == 'i')
4487 c = 'd';
4488 isnumok = 0;
4489 if (PyNumber_Check(v)) {
4490 PyObject *iobj=NULL;
4491
4492 if (PyInt_Check(v) || (PyLong_Check(v))) {
4493 iobj = v;
4494 Py_INCREF(iobj);
4495 }
4496 else {
4497 iobj = PyNumber_Int(v);
Benjamin Petersona708adf2013-01-02 12:21:32 -06004498 if (iobj==NULL) {
Benjamin Peterson8f53ded2013-01-02 12:25:15 -06004499 PyErr_Clear();
4500 iobj = PyNumber_Long(v);
4501 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004502 }
4503 if (iobj!=NULL) {
4504 if (PyInt_Check(iobj)) {
4505 isnumok = 1;
4506 pbuf = formatbuf;
4507 len = formatint(pbuf,
4508 sizeof(formatbuf),
4509 flags, prec, c, iobj);
4510 Py_DECREF(iobj);
4511 if (len < 0)
4512 goto error;
4513 sign = 1;
4514 }
4515 else if (PyLong_Check(iobj)) {
4516 int ilen;
4517
4518 isnumok = 1;
4519 temp = _PyString_FormatLong(iobj, flags,
4520 prec, c, &pbuf, &ilen);
4521 Py_DECREF(iobj);
4522 len = ilen;
4523 if (!temp)
4524 goto error;
4525 sign = 1;
4526 }
4527 else {
4528 Py_DECREF(iobj);
4529 }
4530 }
4531 }
4532 if (!isnumok) {
4533 PyErr_Format(PyExc_TypeError,
4534 "%%%c format: a number is required, "
4535 "not %.200s", c, Py_TYPE(v)->tp_name);
4536 goto error;
4537 }
4538 if (flags & F_ZERO)
4539 fill = '0';
4540 break;
4541 case 'e':
4542 case 'E':
4543 case 'f':
4544 case 'F':
4545 case 'g':
4546 case 'G':
4547 temp = formatfloat(v, flags, prec, c);
4548 if (temp == NULL)
4549 goto error;
4550 pbuf = PyString_AS_STRING(temp);
4551 len = PyString_GET_SIZE(temp);
4552 sign = 1;
4553 if (flags & F_ZERO)
4554 fill = '0';
4555 break;
4556 case 'c':
4557#ifdef Py_USING_UNICODE
4558 if (PyUnicode_Check(v)) {
4559 fmt = fmt_start;
4560 argidx = argidx_start;
4561 goto unicode;
4562 }
4563#endif
4564 pbuf = formatbuf;
4565 len = formatchar(pbuf, sizeof(formatbuf), v);
4566 if (len < 0)
4567 goto error;
4568 break;
4569 default:
4570 PyErr_Format(PyExc_ValueError,
4571 "unsupported format character '%c' (0x%x) "
4572 "at index %zd",
4573 c, c,
4574 (Py_ssize_t)(fmt - 1 -
4575 PyString_AsString(format)));
4576 goto error;
4577 }
4578 if (sign) {
4579 if (*pbuf == '-' || *pbuf == '+') {
4580 sign = *pbuf++;
4581 len--;
4582 }
4583 else if (flags & F_SIGN)
4584 sign = '+';
4585 else if (flags & F_BLANK)
4586 sign = ' ';
4587 else
4588 sign = 0;
4589 }
4590 if (width < len)
4591 width = len;
4592 if (rescnt - (sign != 0) < width) {
4593 reslen -= rescnt;
4594 rescnt = width + fmtcnt + 100;
4595 reslen += rescnt;
4596 if (reslen < 0) {
4597 Py_DECREF(result);
4598 Py_XDECREF(temp);
4599 return PyErr_NoMemory();
4600 }
4601 if (_PyString_Resize(&result, reslen)) {
4602 Py_XDECREF(temp);
4603 return NULL;
4604 }
4605 res = PyString_AS_STRING(result)
4606 + reslen - rescnt;
4607 }
4608 if (sign) {
4609 if (fill != ' ')
4610 *res++ = sign;
4611 rescnt--;
4612 if (width > len)
4613 width--;
4614 }
4615 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4616 assert(pbuf[0] == '0');
4617 assert(pbuf[1] == c);
4618 if (fill != ' ') {
4619 *res++ = *pbuf++;
4620 *res++ = *pbuf++;
4621 }
4622 rescnt -= 2;
4623 width -= 2;
4624 if (width < 0)
4625 width = 0;
4626 len -= 2;
4627 }
4628 if (width > len && !(flags & F_LJUST)) {
4629 do {
4630 --rescnt;
4631 *res++ = fill;
4632 } while (--width > len);
4633 }
4634 if (fill == ' ') {
4635 if (sign)
4636 *res++ = sign;
4637 if ((flags & F_ALT) &&
4638 (c == 'x' || c == 'X')) {
4639 assert(pbuf[0] == '0');
4640 assert(pbuf[1] == c);
4641 *res++ = *pbuf++;
4642 *res++ = *pbuf++;
4643 }
4644 }
4645 Py_MEMCPY(res, pbuf, len);
4646 res += len;
4647 rescnt -= len;
4648 while (--width >= len) {
4649 --rescnt;
4650 *res++ = ' ';
4651 }
4652 if (dict && (argidx < arglen) && c != '%') {
4653 PyErr_SetString(PyExc_TypeError,
4654 "not all arguments converted during string formatting");
4655 Py_XDECREF(temp);
4656 goto error;
4657 }
4658 Py_XDECREF(temp);
4659 } /* '%' */
4660 } /* until end */
4661 if (argidx < arglen && !dict) {
4662 PyErr_SetString(PyExc_TypeError,
4663 "not all arguments converted during string formatting");
4664 goto error;
4665 }
4666 if (args_owned) {
4667 Py_DECREF(args);
4668 }
4669 if (_PyString_Resize(&result, reslen - rescnt))
4670 return NULL;
4671 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004672
4673#ifdef Py_USING_UNICODE
4674 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004675 if (args_owned) {
4676 Py_DECREF(args);
4677 args_owned = 0;
4678 }
4679 /* Fiddle args right (remove the first argidx arguments) */
4680 if (PyTuple_Check(orig_args) && argidx > 0) {
4681 PyObject *v;
4682 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4683 v = PyTuple_New(n);
4684 if (v == NULL)
4685 goto error;
4686 while (--n >= 0) {
4687 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4688 Py_INCREF(w);
4689 PyTuple_SET_ITEM(v, n, w);
4690 }
4691 args = v;
4692 } else {
4693 Py_INCREF(orig_args);
4694 args = orig_args;
4695 }
4696 args_owned = 1;
4697 /* Take what we have of the result and let the Unicode formatting
4698 function format the rest of the input. */
4699 rescnt = res - PyString_AS_STRING(result);
4700 if (_PyString_Resize(&result, rescnt))
4701 goto error;
4702 fmtcnt = PyString_GET_SIZE(format) - \
4703 (fmt - PyString_AS_STRING(format));
4704 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4705 if (format == NULL)
4706 goto error;
4707 v = PyUnicode_Format(format, args);
4708 Py_DECREF(format);
4709 if (v == NULL)
4710 goto error;
4711 /* Paste what we have (result) to what the Unicode formatting
4712 function returned (v) and return the result (or error) */
4713 w = PyUnicode_Concat(result, v);
4714 Py_DECREF(result);
4715 Py_DECREF(v);
4716 Py_DECREF(args);
4717 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004718#endif /* Py_USING_UNICODE */
4719
4720 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004721 Py_DECREF(result);
4722 if (args_owned) {
4723 Py_DECREF(args);
4724 }
4725 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004726}
4727
4728void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004729PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004730{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004731 register PyStringObject *s = (PyStringObject *)(*p);
4732 PyObject *t;
4733 if (s == NULL || !PyString_Check(s))
4734 Py_FatalError("PyString_InternInPlace: strings only please!");
4735 /* If it's a string subclass, we don't really know what putting
4736 it in the interned dict might do. */
4737 if (!PyString_CheckExact(s))
4738 return;
4739 if (PyString_CHECK_INTERNED(s))
4740 return;
4741 if (interned == NULL) {
4742 interned = PyDict_New();
4743 if (interned == NULL) {
4744 PyErr_Clear(); /* Don't leave an exception */
4745 return;
4746 }
4747 }
4748 t = PyDict_GetItem(interned, (PyObject *)s);
4749 if (t) {
4750 Py_INCREF(t);
4751 Py_DECREF(*p);
4752 *p = t;
4753 return;
4754 }
Christian Heimes44720832008-05-26 13:01:01 +00004755
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004756 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4757 PyErr_Clear();
4758 return;
4759 }
4760 /* The two references in interned are not counted by refcnt.
4761 The string deallocator will take care of this */
4762 Py_REFCNT(s) -= 2;
4763 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004764}
4765
4766void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004767PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004768{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004769 PyString_InternInPlace(p);
4770 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4771 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4772 Py_INCREF(*p);
4773 }
Christian Heimes44720832008-05-26 13:01:01 +00004774}
4775
4776
4777PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004778PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004779{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004780 PyObject *s = PyString_FromString(cp);
4781 if (s == NULL)
4782 return NULL;
4783 PyString_InternInPlace(&s);
4784 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004785}
4786
4787void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004788PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004789{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004790 int i;
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004791 for (i = 0; i < UCHAR_MAX + 1; i++)
4792 Py_CLEAR(characters[i]);
4793 Py_CLEAR(nullstring);
Christian Heimes44720832008-05-26 13:01:01 +00004794}
4795
4796void _Py_ReleaseInternedStrings(void)
4797{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004798 PyObject *keys;
4799 PyStringObject *s;
4800 Py_ssize_t i, n;
4801 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004802
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004803 if (interned == NULL || !PyDict_Check(interned))
4804 return;
4805 keys = PyDict_Keys(interned);
4806 if (keys == NULL || !PyList_Check(keys)) {
4807 PyErr_Clear();
4808 return;
4809 }
Christian Heimes44720832008-05-26 13:01:01 +00004810
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004811 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4812 detector, interned strings are not forcibly deallocated; rather, we
4813 give them their stolen references back, and then clear and DECREF
4814 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004815
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004816 n = PyList_GET_SIZE(keys);
4817 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4818 n);
4819 for (i = 0; i < n; i++) {
4820 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4821 switch (s->ob_sstate) {
4822 case SSTATE_NOT_INTERNED:
4823 /* XXX Shouldn't happen */
4824 break;
4825 case SSTATE_INTERNED_IMMORTAL:
4826 Py_REFCNT(s) += 1;
4827 immortal_size += Py_SIZE(s);
4828 break;
4829 case SSTATE_INTERNED_MORTAL:
4830 Py_REFCNT(s) += 2;
4831 mortal_size += Py_SIZE(s);
4832 break;
4833 default:
4834 Py_FatalError("Inconsistent interned string state.");
4835 }
4836 s->ob_sstate = SSTATE_NOT_INTERNED;
4837 }
4838 fprintf(stderr, "total size of all interned strings: "
4839 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4840 "mortal/immortal\n", mortal_size, immortal_size);
4841 Py_DECREF(keys);
4842 PyDict_Clear(interned);
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004843 Py_CLEAR(interned);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004844}