blob: fb7548d33f73bb54aa9b140a0e991d6f7305e360 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000720 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000753 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000808 Py_ssize_t size = Py_Size(op);
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000829 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
830 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Martin v. Löwis68192102007-07-21 06:55:02 +0000834 for (i = 0; i < Py_Size(op); i++) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000857 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000875 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000876 memchr(op->ob_sval, '\'', Py_Size(op)) &&
877 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000882 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000931 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
Martin v. Löwis68192102007-07-21 06:55:02 +0000938 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Andrew Dalke598710c2006-05-25 18:18:39 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000951 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000952 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000953 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 return NULL;
955 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000958 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000959 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000960 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000961 Py_INCREF(bb);
962 return bb;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 Py_INCREF(a);
965 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000967 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000968 if (size < 0) {
969 PyErr_SetString(PyExc_OverflowError,
970 "strings are too large to concat");
971 return NULL;
972 }
973
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000974 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000975 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000976 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000978 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000979 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000980 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000981 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
982 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985#undef b
986}
987
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000989string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991 register Py_ssize_t i;
992 register Py_ssize_t j;
993 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000995 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 if (n < 0)
997 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000998 /* watch out for overflows: the size can overflow int,
999 * and the # of bytes needed can overflow size_t
1000 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001001 size = Py_Size(a) * n;
1002 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001003 PyErr_SetString(PyExc_OverflowError,
1004 "repeated string is too long");
1005 return NULL;
1006 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001007 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 Py_INCREF(a);
1009 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 }
Tim Peterse7c05322004-06-27 17:24:49 +00001011 nbytes = (size_t)size;
1012 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001013 PyErr_SetString(PyExc_OverflowError,
1014 "repeated string is too long");
1015 return NULL;
1016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001017 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001018 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001019 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001021 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001022 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001023 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001024 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001025 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001026 memset(op->ob_sval, a->ob_sval[0] , n);
1027 return (PyObject *) op;
1028 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001029 i = 0;
1030 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001031 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1032 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001033 }
1034 while (i < size) {
1035 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001036 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i += j;
1038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040}
1041
1042/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001045string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001046 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001047 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048{
1049 if (i < 0)
1050 i = 0;
1051 if (j < 0)
1052 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001053 if (j > Py_Size(a))
1054 j = Py_Size(a);
1055 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001056 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001057 Py_INCREF(a);
1058 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 }
1060 if (j < i)
1061 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001062 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Guido van Rossum9284a572000-03-07 15:53:43 +00001065static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001066string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001070 if (PyUnicode_Check(sub_obj))
1071 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001074 PyErr_Format(PyExc_TypeError,
1075 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001076 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001077 return -1;
1078 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001079 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001080
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001082}
1083
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001085string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001087 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001088 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001089 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 return NULL;
1092 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001093 pchar = a->ob_sval[i];
1094 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001095 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001096 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001097 else {
1098#ifdef COUNT_ALLOCS
1099 one_strings++;
1100#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001101 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001102 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001103 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001104}
1105
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106static PyObject*
1107string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001108{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001109 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001110 Py_ssize_t len_a, len_b;
1111 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001112 PyObject *result;
1113
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001114 /* Make sure both arguments are strings. */
1115 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 result = Py_NotImplemented;
1117 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001118 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 if (a == b) {
1120 switch (op) {
1121 case Py_EQ:case Py_LE:case Py_GE:
1122 result = Py_True;
1123 goto out;
1124 case Py_NE:case Py_LT:case Py_GT:
1125 result = Py_False;
1126 goto out;
1127 }
1128 }
1129 if (op == Py_EQ) {
1130 /* Supporting Py_NE here as well does not save
1131 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001132 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001134 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001135 result = Py_True;
1136 } else {
1137 result = Py_False;
1138 }
1139 goto out;
1140 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001141 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001142 min_len = (len_a < len_b) ? len_a : len_b;
1143 if (min_len > 0) {
1144 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1145 if (c==0)
1146 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001147 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001148 c = 0;
1149 if (c == 0)
1150 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1151 switch (op) {
1152 case Py_LT: c = c < 0; break;
1153 case Py_LE: c = c <= 0; break;
1154 case Py_EQ: assert(0); break; /* unreachable */
1155 case Py_NE: c = c != 0; break;
1156 case Py_GT: c = c > 0; break;
1157 case Py_GE: c = c >= 0; break;
1158 default:
1159 result = Py_NotImplemented;
1160 goto out;
1161 }
1162 result = c ? Py_True : Py_False;
1163 out:
1164 Py_INCREF(result);
1165 return result;
1166}
1167
1168int
1169_PyString_Eq(PyObject *o1, PyObject *o2)
1170{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001171 PyStringObject *a = (PyStringObject*) o1;
1172 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001173 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001174 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001175 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001176}
1177
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178static long
Fred Drakeba096332000-07-09 07:04:36 +00001179string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 register unsigned char *p;
1183 register long x;
1184
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 if (a->ob_shash != -1)
1186 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001187 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001188 p = (unsigned char *) a->ob_sval;
1189 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001191 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001192 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 if (x == -1)
1194 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001196 return x;
1197}
1198
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199static PyObject*
1200string_subscript(PyStringObject* self, PyObject* item)
1201{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001202 if (PyIndex_Check(item)) {
1203 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 if (i == -1 && PyErr_Occurred())
1205 return NULL;
1206 if (i < 0)
1207 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001208 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 }
1210 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001211 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 char* source_buf;
1213 char* result_buf;
1214 PyObject* result;
1215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 PyString_GET_SIZE(self),
1218 &start, &stop, &step, &slicelength) < 0) {
1219 return NULL;
1220 }
1221
1222 if (slicelength <= 0) {
1223 return PyString_FromStringAndSize("", 0);
1224 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001225 else if (start == 0 && step == 1 &&
1226 slicelength == PyString_GET_SIZE(self) &&
1227 PyString_CheckExact(self)) {
1228 Py_INCREF(self);
1229 return (PyObject *)self;
1230 }
1231 else if (step == 1) {
1232 return PyString_FromStringAndSize(
1233 PyString_AS_STRING(self) + start,
1234 slicelength);
1235 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001236 else {
1237 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001238 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001239 if (result_buf == NULL)
1240 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241
Tim Petersae1d0c92006-03-17 03:29:34 +00001242 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001243 cur += step, i++) {
1244 result_buf[i] = source_buf[cur];
1245 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001246
1247 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248 slicelength);
1249 PyMem_Free(result_buf);
1250 return result;
1251 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001252 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001253 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001254 PyErr_Format(PyExc_TypeError,
1255 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001256 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001257 return NULL;
1258 }
1259}
1260
Martin v. Löwis18e16552006-02-15 17:27:45 +00001261static Py_ssize_t
1262string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263{
1264 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001265 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001266 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267 return -1;
1268 }
1269 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001270 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271}
1272
Martin v. Löwis18e16552006-02-15 17:27:45 +00001273static Py_ssize_t
1274string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001275{
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001277 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279}
1280
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281static Py_ssize_t
1282string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001283{
1284 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001285 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286 return 1;
1287}
1288
Martin v. Löwis18e16552006-02-15 17:27:45 +00001289static Py_ssize_t
1290string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001291{
1292 if ( index != 0 ) {
1293 PyErr_SetString(PyExc_SystemError,
1294 "accessing non-existent string segment");
1295 return -1;
1296 }
1297 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001298 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001299}
1300
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001301static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001302 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001303 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (ssizeargfunc)string_repeat, /*sq_repeat*/
1305 (ssizeargfunc)string_item, /*sq_item*/
1306 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001307 0, /*sq_ass_item*/
1308 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001309 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001310};
1311
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001312static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001313 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001314 (binaryfunc)string_subscript,
1315 0,
1316};
1317
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001318static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001319 (readbufferproc)string_buffer_getreadbuf,
1320 (writebufferproc)string_buffer_getwritebuf,
1321 (segcountproc)string_buffer_getsegcount,
1322 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001323};
1324
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325
1326
1327#define LEFTSTRIP 0
1328#define RIGHTSTRIP 1
1329#define BOTHSTRIP 2
1330
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001331/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001332static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1333
1334#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001335
Andrew Dalke525eab32006-05-26 14:00:45 +00001336
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001337/* Don't call if length < 2 */
1338#define Py_STRING_MATCH(target, offset, pattern, length) \
1339 (target[offset] == pattern[0] && \
1340 target[offset+length-1] == pattern[length-1] && \
1341 !memcmp(target+offset+1, pattern+1, length-2) )
1342
1343
Andrew Dalke525eab32006-05-26 14:00:45 +00001344/* Overallocate the initial list to reduce the number of reallocs for small
1345 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1346 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1347 text (roughly 11 words per line) and field delimited data (usually 1-10
1348 fields). For large strings the split algorithms are bandwidth limited
1349 so increasing the preallocation likely will not improve things.*/
1350
1351#define MAX_PREALLOC 12
1352
1353/* 5 splits gives 6 elements */
1354#define PREALLOC_SIZE(maxsplit) \
1355 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1356
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001357#define SPLIT_APPEND(data, left, right) \
1358 str = PyString_FromStringAndSize((data) + (left), \
1359 (right) - (left)); \
1360 if (str == NULL) \
1361 goto onError; \
1362 if (PyList_Append(list, str)) { \
1363 Py_DECREF(str); \
1364 goto onError; \
1365 } \
1366 else \
1367 Py_DECREF(str);
1368
Andrew Dalke02758d62006-05-26 15:21:01 +00001369#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001370 str = PyString_FromStringAndSize((data) + (left), \
1371 (right) - (left)); \
1372 if (str == NULL) \
1373 goto onError; \
1374 if (count < MAX_PREALLOC) { \
1375 PyList_SET_ITEM(list, count, str); \
1376 } else { \
1377 if (PyList_Append(list, str)) { \
1378 Py_DECREF(str); \
1379 goto onError; \
1380 } \
1381 else \
1382 Py_DECREF(str); \
1383 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001384 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001385
1386/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001387#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001388
Andrew Dalke02758d62006-05-26 15:21:01 +00001389#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1390#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1391#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1392#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1393
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001394Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001395split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396{
Andrew Dalke525eab32006-05-26 14:00:45 +00001397 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001398 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001399 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400
1401 if (list == NULL)
1402 return NULL;
1403
Andrew Dalke02758d62006-05-26 15:21:01 +00001404 i = j = 0;
1405
1406 while (maxsplit-- > 0) {
1407 SKIP_SPACE(s, i, len);
1408 if (i==len) break;
1409 j = i; i++;
1410 SKIP_NONSPACE(s, i, len);
1411 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001413
1414 if (i < len) {
1415 /* Only occurs when maxsplit was reached */
1416 /* Skip any remaining whitespace and copy to end of string */
1417 SKIP_SPACE(s, i, len);
1418 if (i != len)
1419 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001420 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 Py_DECREF(list);
1425 return NULL;
1426}
1427
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001428Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001429split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430{
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001432 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001433 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434
1435 if (list == NULL)
1436 return NULL;
1437
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001438 i = j = 0;
1439 while ((j < len) && (maxcount-- > 0)) {
1440 for(; j<len; j++) {
1441 /* I found that using memchr makes no difference */
1442 if (s[j] == ch) {
1443 SPLIT_ADD(s, i, j);
1444 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001446 }
1447 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001448 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001449 if (i <= len) {
1450 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001452 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001453 return list;
1454
1455 onError:
1456 Py_DECREF(list);
1457 return NULL;
1458}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001460PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461"S.split([sep [,maxsplit]]) -> list of strings\n\
1462\n\
1463Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001465splits are done. If sep is not specified or is None, any\n\
1466whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467
1468static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001469string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001471 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001472 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001474 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001475#ifdef USE_FAST
1476 Py_ssize_t pos;
1477#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478
Martin v. Löwis9c830762006-04-13 08:37:17 +00001479 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001482 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 if (PyString_Check(subobj)) {
1486 sub = PyString_AS_STRING(subobj);
1487 n = PyString_GET_SIZE(subobj);
1488 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001489#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490 else if (PyUnicode_Check(subobj))
1491 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001492#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1494 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 if (n == 0) {
1497 PyErr_SetString(PyExc_ValueError, "empty separator");
1498 return NULL;
1499 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001500 else if (n == 1)
1501 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502
Andrew Dalke525eab32006-05-26 14:00:45 +00001503 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 if (list == NULL)
1505 return NULL;
1506
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001507#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001509 while (maxsplit-- > 0) {
1510 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1511 if (pos < 0)
1512 break;
1513 j = i+pos;
1514 SPLIT_ADD(s, i, j);
1515 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001517#else
1518 i = j = 0;
1519 while ((j+n <= len) && (maxsplit-- > 0)) {
1520 for (; j+n <= len; j++) {
1521 if (Py_STRING_MATCH(s, j, sub, n)) {
1522 SPLIT_ADD(s, i, j);
1523 i = j = j + n;
1524 break;
1525 }
1526 }
1527 }
1528#endif
1529 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001530 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 return list;
1532
Andrew Dalke525eab32006-05-26 14:00:45 +00001533 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534 Py_DECREF(list);
1535 return NULL;
1536}
1537
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538PyDoc_STRVAR(partition__doc__,
1539"S.partition(sep) -> (head, sep, tail)\n\
1540\n\
1541Searches for the separator sep in S, and returns the part before it,\n\
1542the separator itself, and the part after it. If the separator is not\n\
1543found, returns S and two empty strings.");
1544
1545static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001546string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001547{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001548 const char *sep;
1549 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001550
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001551 if (PyString_Check(sep_obj)) {
1552 sep = PyString_AS_STRING(sep_obj);
1553 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001554 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001555#ifdef Py_USING_UNICODE
1556 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001557 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001559 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001560 return NULL;
1561
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001562 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001563 (PyObject*) self,
1564 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1565 sep_obj, sep, sep_len
1566 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001567}
1568
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001569PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001570"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001571\n\
1572Searches for the separator sep in S, starting at the end of S, and returns\n\
1573the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001574separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001575
1576static PyObject *
1577string_rpartition(PyStringObject *self, PyObject *sep_obj)
1578{
1579 const char *sep;
1580 Py_ssize_t sep_len;
1581
1582 if (PyString_Check(sep_obj)) {
1583 sep = PyString_AS_STRING(sep_obj);
1584 sep_len = PyString_GET_SIZE(sep_obj);
1585 }
1586#ifdef Py_USING_UNICODE
1587 else if (PyUnicode_Check(sep_obj))
1588 return PyUnicode_Partition((PyObject *) self, sep_obj);
1589#endif
1590 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1591 return NULL;
1592
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001593 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001594 (PyObject*) self,
1595 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1596 sep_obj, sep, sep_len
1597 );
1598}
1599
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001600Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001601rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001602{
Andrew Dalke525eab32006-05-26 14:00:45 +00001603 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001604 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001605 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001606
1607 if (list == NULL)
1608 return NULL;
1609
Andrew Dalke02758d62006-05-26 15:21:01 +00001610 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001611
Andrew Dalke02758d62006-05-26 15:21:01 +00001612 while (maxsplit-- > 0) {
1613 RSKIP_SPACE(s, i);
1614 if (i<0) break;
1615 j = i; i--;
1616 RSKIP_NONSPACE(s, i);
1617 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001618 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001619 if (i >= 0) {
1620 /* Only occurs when maxsplit was reached */
1621 /* Skip any remaining whitespace and copy to beginning of string */
1622 RSKIP_SPACE(s, i);
1623 if (i >= 0)
1624 SPLIT_ADD(s, 0, i + 1);
1625
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001626 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001627 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001628 if (PyList_Reverse(list) < 0)
1629 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001630 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001632 Py_DECREF(list);
1633 return NULL;
1634}
1635
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001636Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001637rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001638{
Andrew Dalke525eab32006-05-26 14:00:45 +00001639 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001640 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001641 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642
1643 if (list == NULL)
1644 return NULL;
1645
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001646 i = j = len - 1;
1647 while ((i >= 0) && (maxcount-- > 0)) {
1648 for (; i >= 0; i--) {
1649 if (s[i] == ch) {
1650 SPLIT_ADD(s, i + 1, j + 1);
1651 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001652 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001653 }
1654 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001655 }
1656 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001657 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001658 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001659 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001660 if (PyList_Reverse(list) < 0)
1661 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001662 return list;
1663
1664 onError:
1665 Py_DECREF(list);
1666 return NULL;
1667}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668
1669PyDoc_STRVAR(rsplit__doc__,
1670"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1671\n\
1672Return a list of the words in the string S, using sep as the\n\
1673delimiter string, starting at the end of the string and working\n\
1674to the front. If maxsplit is given, at most maxsplit splits are\n\
1675done. If sep is not specified or is None, any whitespace string\n\
1676is a separator.");
1677
1678static PyObject *
1679string_rsplit(PyStringObject *self, PyObject *args)
1680{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001682 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001684 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001685
Martin v. Löwis9c830762006-04-13 08:37:17 +00001686 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001687 return NULL;
1688 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001689 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690 if (subobj == Py_None)
1691 return rsplit_whitespace(s, len, maxsplit);
1692 if (PyString_Check(subobj)) {
1693 sub = PyString_AS_STRING(subobj);
1694 n = PyString_GET_SIZE(subobj);
1695 }
1696#ifdef Py_USING_UNICODE
1697 else if (PyUnicode_Check(subobj))
1698 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1699#endif
1700 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1701 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001702
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703 if (n == 0) {
1704 PyErr_SetString(PyExc_ValueError, "empty separator");
1705 return NULL;
1706 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001707 else if (n == 1)
1708 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001709
Andrew Dalke525eab32006-05-26 14:00:45 +00001710 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001711 if (list == NULL)
1712 return NULL;
1713
1714 j = len;
1715 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001716
1717 while ( (i >= 0) && (maxsplit-- > 0) ) {
1718 for (; i>=0; i--) {
1719 if (Py_STRING_MATCH(s, i, sub, n)) {
1720 SPLIT_ADD(s, i + n, j);
1721 j = i;
1722 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001724 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001726 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001727 SPLIT_ADD(s, 0, j);
1728 FIX_PREALLOC_SIZE(list);
1729 if (PyList_Reverse(list) < 0)
1730 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001731 return list;
1732
Andrew Dalke525eab32006-05-26 14:00:45 +00001733onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001734 Py_DECREF(list);
1735 return NULL;
1736}
1737
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001739PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740"S.join(sequence) -> string\n\
1741\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001742Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001743sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
1745static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747{
1748 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001749 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001752 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001753 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001754 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001755 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
Tim Peters19fe14e2001-01-19 03:03:47 +00001757 seq = PySequence_Fast(orig, "");
1758 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 return NULL;
1760 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001761
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001762 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 if (seqlen == 0) {
1764 Py_DECREF(seq);
1765 return PyString_FromString("");
1766 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001768 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1770 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001772 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775
Raymond Hettinger674f2412004-08-23 23:23:54 +00001776 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001777 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001778 * Do a pre-pass to figure out the total amount of space we'll
1779 * need (sz), see whether any argument is absurd, and defer to
1780 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001781 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001782 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001784 item = PySequence_Fast_GET_ITEM(seq, i);
1785 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001786#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001787 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001788 /* Defer to Unicode join.
1789 * CAUTION: There's no gurantee that the
1790 * original sequence can be iterated over
1791 * again, so we must pass seq here.
1792 */
1793 PyObject *result;
1794 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001795 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001796 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001797 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001798#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001799 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001800 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001801 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001802 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 Py_DECREF(seq);
1804 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 sz += PyString_GET_SIZE(item);
1807 if (i != 0)
1808 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001809 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001811 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001812 Py_DECREF(seq);
1813 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001815 }
1816
1817 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001819 if (res == NULL) {
1820 Py_DECREF(seq);
1821 return NULL;
1822 }
1823
1824 /* Catenate everything. */
1825 p = PyString_AS_STRING(res);
1826 for (i = 0; i < seqlen; ++i) {
1827 size_t n;
1828 item = PySequence_Fast_GET_ITEM(seq, i);
1829 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001830 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001831 p += n;
1832 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001833 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001834 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001835 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001837
Jeremy Hylton49048292000-07-11 03:28:17 +00001838 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840}
1841
Tim Peters52e155e2001-06-16 05:42:57 +00001842PyObject *
1843_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001844{
Tim Petersa7259592001-06-16 05:11:17 +00001845 assert(sep != NULL && PyString_Check(sep));
1846 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001847 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001848}
1849
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001850Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001851string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001852{
1853 if (*end > len)
1854 *end = len;
1855 else if (*end < 0)
1856 *end += len;
1857 if (*end < 0)
1858 *end = 0;
1859 if (*start < 0)
1860 *start += len;
1861 if (*start < 0)
1862 *start = 0;
1863}
1864
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001865Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001866string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001869 const char *sub;
1870 Py_ssize_t sub_len;
1871 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001873 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1874 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 return -2;
1876 if (PyString_Check(subobj)) {
1877 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001878 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001880#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001882 return PyUnicode_Find(
1883 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001884#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001885 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001886 /* XXX - the "expected a character buffer object" is pretty
1887 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888 return -2;
1889
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001890 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001891 return stringlib_find_slice(
1892 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1893 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001894 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001895 return stringlib_rfind_slice(
1896 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1897 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898}
1899
1900
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001901PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902"S.find(sub [,start [,end]]) -> int\n\
1903\n\
1904Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001905such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906arguments start and end are interpreted as in slice notation.\n\
1907\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001908Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909
1910static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001911string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 if (result == -2)
1915 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917}
1918
1919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921"S.index(sub [,start [,end]]) -> int\n\
1922\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001923Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
1925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001926string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001928 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 if (result == -2)
1930 return NULL;
1931 if (result == -1) {
1932 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001933 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 return NULL;
1935 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001936 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937}
1938
1939
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001940PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941"S.rfind(sub [,start [,end]]) -> int\n\
1942\n\
1943Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001944such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945arguments start and end are interpreted as in slice notation.\n\
1946\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001947Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948
1949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001950string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001952 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 if (result == -2)
1954 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001959PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960"S.rindex(sub [,start [,end]]) -> int\n\
1961\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001962Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
1964static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001965string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001967 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968 if (result == -2)
1969 return NULL;
1970 if (result == -1) {
1971 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001972 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973 return NULL;
1974 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976}
1977
1978
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001979Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1981{
1982 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1986 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001987
1988 i = 0;
1989 if (striptype != RIGHTSTRIP) {
1990 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1991 i++;
1992 }
1993 }
1994
1995 j = len;
1996 if (striptype != LEFTSTRIP) {
1997 do {
1998 j--;
1999 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2000 j++;
2001 }
2002
2003 if (i == 0 && j == len && PyString_CheckExact(self)) {
2004 Py_INCREF(self);
2005 return (PyObject*)self;
2006 }
2007 else
2008 return PyString_FromStringAndSize(s+i, j-i);
2009}
2010
2011
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002012Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002013do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014{
2015 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002016 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 i = 0;
2019 if (striptype != RIGHTSTRIP) {
2020 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2021 i++;
2022 }
2023 }
2024
2025 j = len;
2026 if (striptype != LEFTSTRIP) {
2027 do {
2028 j--;
2029 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2030 j++;
2031 }
2032
Tim Peters8fa5dd02001-09-12 02:18:30 +00002033 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 Py_INCREF(self);
2035 return (PyObject*)self;
2036 }
2037 else
2038 return PyString_FromStringAndSize(s+i, j-i);
2039}
2040
2041
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002042Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2044{
2045 PyObject *sep = NULL;
2046
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002047 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002048 return NULL;
2049
2050 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002051 if (PyString_Check(sep))
2052 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002054 else if (PyUnicode_Check(sep)) {
2055 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2056 PyObject *res;
2057 if (uniself==NULL)
2058 return NULL;
2059 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2060 striptype, sep);
2061 Py_DECREF(uniself);
2062 return res;
2063 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002064#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002065 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002066#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002067 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002068#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002069 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002070#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002071 STRIPNAME(striptype));
2072 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002073 }
2074
2075 return do_strip(self, striptype);
2076}
2077
2078
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002079PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002080"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081\n\
2082Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002084If chars is given and not None, remove characters in chars instead.\n\
2085If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086
2087static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002088string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002090 if (PyTuple_GET_SIZE(args) == 0)
2091 return do_strip(self, BOTHSTRIP); /* Common case */
2092 else
2093 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094}
2095
2096
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002097PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002101If chars is given and not None, remove characters in chars instead.\n\
2102If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103
2104static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002105string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002107 if (PyTuple_GET_SIZE(args) == 0)
2108 return do_strip(self, LEFTSTRIP); /* Common case */
2109 else
2110 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111}
2112
2113
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002115"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002117Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002118If chars is given and not None, remove characters in chars instead.\n\
2119If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
2121static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002122string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002124 if (PyTuple_GET_SIZE(args) == 0)
2125 return do_strip(self, RIGHTSTRIP); /* Common case */
2126 else
2127 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128}
2129
2130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002131PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132"S.lower() -> string\n\
2133\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002134Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002136/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2137#ifndef _tolower
2138#define _tolower tolower
2139#endif
2140
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002142string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002144 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002145 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002146 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002148 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002149 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002151
2152 s = PyString_AS_STRING(newobj);
2153
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002154 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002155
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002157 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002158 if (isupper(c))
2159 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161
Anthony Baxtera6286212006-04-11 07:42:36 +00002162 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163}
2164
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002165PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166"S.upper() -> string\n\
2167\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002168Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002170#ifndef _toupper
2171#define _toupper toupper
2172#endif
2173
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002175string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002177 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002178 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002179 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002181 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002182 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002184
2185 s = PyString_AS_STRING(newobj);
2186
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002187 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002188
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002190 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002191 if (islower(c))
2192 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002194
Anthony Baxtera6286212006-04-11 07:42:36 +00002195 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196}
2197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002198PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199"S.title() -> string\n\
2200\n\
2201Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002202characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203
2204static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002205string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206{
2207 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002208 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002210 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211
Anthony Baxtera6286212006-04-11 07:42:36 +00002212 newobj = PyString_FromStringAndSize(NULL, n);
2213 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002215 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216 for (i = 0; i < n; i++) {
2217 int c = Py_CHARMASK(*s++);
2218 if (islower(c)) {
2219 if (!previous_is_cased)
2220 c = toupper(c);
2221 previous_is_cased = 1;
2222 } else if (isupper(c)) {
2223 if (previous_is_cased)
2224 c = tolower(c);
2225 previous_is_cased = 1;
2226 } else
2227 previous_is_cased = 0;
2228 *s_new++ = c;
2229 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002230 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231}
2232
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002233PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234"S.capitalize() -> string\n\
2235\n\
2236Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002237capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238
2239static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002240string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241{
2242 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002243 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245
Anthony Baxtera6286212006-04-11 07:42:36 +00002246 newobj = PyString_FromStringAndSize(NULL, n);
2247 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002249 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 if (0 < n) {
2251 int c = Py_CHARMASK(*s++);
2252 if (islower(c))
2253 *s_new = toupper(c);
2254 else
2255 *s_new = c;
2256 s_new++;
2257 }
2258 for (i = 1; i < n; i++) {
2259 int c = Py_CHARMASK(*s++);
2260 if (isupper(c))
2261 *s_new = tolower(c);
2262 else
2263 *s_new = c;
2264 s_new++;
2265 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002266 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267}
2268
2269
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002270PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271"S.count(sub[, start[, end]]) -> int\n\
2272\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002273Return the number of non-overlapping occurrences of substring sub in\n\
2274string S[start:end]. Optional arguments start and end are interpreted\n\
2275as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276
2277static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002278string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 PyObject *sub_obj;
2281 const char *str = PyString_AS_STRING(self), *sub;
2282 Py_ssize_t sub_len;
2283 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2286 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002288
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002289 if (PyString_Check(sub_obj)) {
2290 sub = PyString_AS_STRING(sub_obj);
2291 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002294 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002295 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002296 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002297 if (count == -1)
2298 return NULL;
2299 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002300 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002301 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002302#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002303 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 return NULL;
2305
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002306 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002307
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002308 return PyInt_FromSsize_t(
2309 stringlib_count(str + start, end - start, sub, sub_len)
2310 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311}
2312
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002313PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314"S.swapcase() -> string\n\
2315\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002316Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002317converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318
2319static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002320string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321{
2322 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002323 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002324 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325
Anthony Baxtera6286212006-04-11 07:42:36 +00002326 newobj = PyString_FromStringAndSize(NULL, n);
2327 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002329 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330 for (i = 0; i < n; i++) {
2331 int c = Py_CHARMASK(*s++);
2332 if (islower(c)) {
2333 *s_new = toupper(c);
2334 }
2335 else if (isupper(c)) {
2336 *s_new = tolower(c);
2337 }
2338 else
2339 *s_new = c;
2340 s_new++;
2341 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002342 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343}
2344
2345
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002346PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347"S.translate(table [,deletechars]) -> string\n\
2348\n\
2349Return a copy of the string S, where all characters occurring\n\
2350in the optional argument deletechars are removed, and the\n\
2351remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002352translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353
2354static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002355string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002358 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002359 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002361 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002362 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363 PyObject *result;
2364 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002367 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002370
2371 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002372 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 tablen = PyString_GET_SIZE(tableobj);
2374 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002375 else if (tableobj == Py_None) {
2376 table = NULL;
2377 tablen = 256;
2378 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002379#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002381 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 parameter; instead a mapping to None will cause characters
2383 to be deleted. */
2384 if (delobj != NULL) {
2385 PyErr_SetString(PyExc_TypeError,
2386 "deletions are implemented differently for unicode");
2387 return NULL;
2388 }
2389 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2390 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002391#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002392 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002393 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394
Martin v. Löwis00b61272002-12-12 20:03:19 +00002395 if (tablen != 256) {
2396 PyErr_SetString(PyExc_ValueError,
2397 "translation table must be 256 characters long");
2398 return NULL;
2399 }
2400
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 if (delobj != NULL) {
2402 if (PyString_Check(delobj)) {
2403 del_table = PyString_AS_STRING(delobj);
2404 dellen = PyString_GET_SIZE(delobj);
2405 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002406#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 else if (PyUnicode_Check(delobj)) {
2408 PyErr_SetString(PyExc_TypeError,
2409 "deletions are implemented differently for unicode");
2410 return NULL;
2411 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002412#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2414 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 }
2416 else {
2417 del_table = NULL;
2418 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419 }
2420
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002421 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 result = PyString_FromStringAndSize((char *)NULL, inlen);
2423 if (result == NULL)
2424 return NULL;
2425 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002426 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002428 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 /* If no deletions are required, use faster code */
2430 for (i = inlen; --i >= 0; ) {
2431 c = Py_CHARMASK(*input++);
2432 if (Py_CHARMASK((*output++ = table[c])) != c)
2433 changed = 1;
2434 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002435 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 return result;
2437 Py_DECREF(result);
2438 Py_INCREF(input_obj);
2439 return input_obj;
2440 }
2441
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002442 if (table == NULL) {
2443 for (i = 0; i < 256; i++)
2444 trans_table[i] = Py_CHARMASK(i);
2445 } else {
2446 for (i = 0; i < 256; i++)
2447 trans_table[i] = Py_CHARMASK(table[i]);
2448 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
2450 for (i = 0; i < dellen; i++)
2451 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2452
2453 for (i = inlen; --i >= 0; ) {
2454 c = Py_CHARMASK(*input++);
2455 if (trans_table[c] != -1)
2456 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2457 continue;
2458 changed = 1;
2459 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002460 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002461 Py_DECREF(result);
2462 Py_INCREF(input_obj);
2463 return input_obj;
2464 }
2465 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002466 if (inlen > 0)
2467 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002468 return result;
2469}
2470
2471
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002472#define FORWARD 1
2473#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002475/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002477#define findchar(target, target_len, c) \
2478 ((char *)memchr((const void *)(target), c, target_len))
2479
2480/* String ops must return a string. */
2481/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002482Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485 if (PyString_CheckExact(self)) {
2486 Py_INCREF(self);
2487 return self;
2488 }
2489 return (PyStringObject *)PyString_FromStringAndSize(
2490 PyString_AS_STRING(self),
2491 PyString_GET_SIZE(self));
2492}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002493
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002494Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002495countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496{
2497 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002498 const char *start=target;
2499 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002500
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002501 while ( (start=findchar(start, end-start, c)) != NULL ) {
2502 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002503 if (count >= maxcount)
2504 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002505 start += 1;
2506 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002507 return count;
2508}
2509
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002510Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002511findstring(const char *target, Py_ssize_t target_len,
2512 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002513 Py_ssize_t start,
2514 Py_ssize_t end,
2515 int direction)
2516{
2517 if (start < 0) {
2518 start += target_len;
2519 if (start < 0)
2520 start = 0;
2521 }
2522 if (end > target_len) {
2523 end = target_len;
2524 } else if (end < 0) {
2525 end += target_len;
2526 if (end < 0)
2527 end = 0;
2528 }
2529
2530 /* zero-length substrings always match at the first attempt */
2531 if (pattern_len == 0)
2532 return (direction > 0) ? start : end;
2533
2534 end -= pattern_len;
2535
2536 if (direction < 0) {
2537 for (; end >= start; end--)
2538 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2539 return end;
2540 } else {
2541 for (; start <= end; start++)
2542 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2543 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544 }
2545 return -1;
2546}
2547
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002548Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002549countstring(const char *target, Py_ssize_t target_len,
2550 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002551 Py_ssize_t start,
2552 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002553 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002554{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002555 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002557 if (start < 0) {
2558 start += target_len;
2559 if (start < 0)
2560 start = 0;
2561 }
2562 if (end > target_len) {
2563 end = target_len;
2564 } else if (end < 0) {
2565 end += target_len;
2566 if (end < 0)
2567 end = 0;
2568 }
2569
2570 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002571 if (pattern_len == 0 || maxcount == 0) {
2572 if (target_len+1 < maxcount)
2573 return target_len+1;
2574 return maxcount;
2575 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002576
2577 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002578 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002579 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2581 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002582 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002583 end -= pattern_len-1;
2584 }
2585 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002586 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2588 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002589 if (--maxcount <= 0)
2590 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 start += pattern_len-1;
2592 }
2593 }
2594 return count;
2595}
2596
2597
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002598/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002599
2600/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002601Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002603 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002604 Py_ssize_t maxcount)
2605{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002606 char *self_s, *result_s;
2607 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002608 Py_ssize_t count, i, product;
2609 PyStringObject *result;
2610
2611 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002612
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613 /* 1 at the end plus 1 after every character */
2614 count = self_len+1;
2615 if (maxcount < count)
2616 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002617
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 /* Check for overflow */
2619 /* result_len = count * to_len + self_len; */
2620 product = count * to_len;
2621 if (product / to_len != count) {
2622 PyErr_SetString(PyExc_OverflowError,
2623 "replace string is too long");
2624 return NULL;
2625 }
2626 result_len = product + self_len;
2627 if (result_len < 0) {
2628 PyErr_SetString(PyExc_OverflowError,
2629 "replace string is too long");
2630 return NULL;
2631 }
2632
2633 if (! (result = (PyStringObject *)
2634 PyString_FromStringAndSize(NULL, result_len)) )
2635 return NULL;
2636
2637 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638 result_s = PyString_AS_STRING(result);
2639
2640 /* TODO: special case single character, which doesn't need memcpy */
2641
2642 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002643 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002644 result_s += to_len;
2645 count -= 1;
2646
2647 for (i=0; i<count; i++) {
2648 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002649 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650 result_s += to_len;
2651 }
2652
2653 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002654 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002655
2656 return result;
2657}
2658
2659/* Special case for deleting a single character */
2660/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002661Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662replace_delete_single_character(PyStringObject *self,
2663 char from_c, Py_ssize_t maxcount)
2664{
2665 char *self_s, *result_s;
2666 char *start, *next, *end;
2667 Py_ssize_t self_len, result_len;
2668 Py_ssize_t count;
2669 PyStringObject *result;
2670
2671 self_len = PyString_GET_SIZE(self);
2672 self_s = PyString_AS_STRING(self);
2673
Andrew Dalke51324072006-05-26 20:25:22 +00002674 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 if (count == 0) {
2676 return return_self(self);
2677 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002678
2679 result_len = self_len - count; /* from_len == 1 */
2680 assert(result_len>=0);
2681
2682 if ( (result = (PyStringObject *)
2683 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2684 return NULL;
2685 result_s = PyString_AS_STRING(result);
2686
2687 start = self_s;
2688 end = self_s + self_len;
2689 while (count-- > 0) {
2690 next = findchar(start, end-start, from_c);
2691 if (next == NULL)
2692 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002693 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002694 result_s += (next-start);
2695 start = next+1;
2696 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002697 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002698
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002699 return result;
2700}
2701
2702/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2703
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002704Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002705replace_delete_substring(PyStringObject *self,
2706 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002707 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002708 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002709 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002710 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002711 Py_ssize_t count, offset;
2712 PyStringObject *result;
2713
2714 self_len = PyString_GET_SIZE(self);
2715 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716
2717 count = countstring(self_s, self_len,
2718 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002719 0, self_len, 1,
2720 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002721
2722 if (count == 0) {
2723 /* no matches */
2724 return return_self(self);
2725 }
2726
2727 result_len = self_len - (count * from_len);
2728 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002729
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002730 if ( (result = (PyStringObject *)
2731 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2732 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002733
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002734 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002735
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736 start = self_s;
2737 end = self_s + self_len;
2738 while (count-- > 0) {
2739 offset = findstring(start, end-start,
2740 from_s, from_len,
2741 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742 if (offset == -1)
2743 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002745
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002746 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002747
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002748 result_s += (next-start);
2749 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002750 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002751 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002753}
2754
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002755/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002756Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757replace_single_character_in_place(PyStringObject *self,
2758 char from_c, char to_c,
2759 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002760{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 char *self_s, *result_s, *start, *end, *next;
2762 Py_ssize_t self_len;
2763 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 /* The result string will be the same size */
2766 self_s = PyString_AS_STRING(self);
2767 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002768
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002770
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 if (next == NULL) {
2772 /* No matches; return the original string */
2773 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002774 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002775
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002777 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 if (result == NULL)
2779 return NULL;
2780 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002781 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 /* change everything in-place, starting with this one */
2784 start = result_s + (next-self_s);
2785 *start = to_c;
2786 start++;
2787 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002788
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789 while (--maxcount > 0) {
2790 next = findchar(start, end-start, from_c);
2791 if (next == NULL)
2792 break;
2793 *next = to_c;
2794 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002795 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002798}
2799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002801Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002803 const char *from_s, Py_ssize_t from_len,
2804 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 Py_ssize_t maxcount)
2806{
2807 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002808 char *self_s;
2809 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002811
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002813
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 self_s = PyString_AS_STRING(self);
2815 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002816
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817 offset = findstring(self_s, self_len,
2818 from_s, from_len,
2819 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820 if (offset == -1) {
2821 /* No matches; return the original string */
2822 return return_self(self);
2823 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002824
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002826 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 if (result == NULL)
2828 return NULL;
2829 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002830 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002831
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 /* change everything in-place, starting with this one */
2833 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002834 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835 start += from_len;
2836 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002837
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002838 while ( --maxcount > 0) {
2839 offset = findstring(start, end-start,
2840 from_s, from_len,
2841 0, end-start, FORWARD);
2842 if (offset==-1)
2843 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002844 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 start += offset+from_len;
2846 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002847
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848 return result;
2849}
2850
2851/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002852Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002853replace_single_character(PyStringObject *self,
2854 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002855 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 Py_ssize_t maxcount)
2857{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002858 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002860 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002861 Py_ssize_t count, product;
2862 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002863
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864 self_s = PyString_AS_STRING(self);
2865 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002866
Andrew Dalke51324072006-05-26 20:25:22 +00002867 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002868 if (count == 0) {
2869 /* no matches, return unchanged */
2870 return return_self(self);
2871 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002872
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873 /* use the difference between current and new, hence the "-1" */
2874 /* result_len = self_len + count * (to_len-1) */
2875 product = count * (to_len-1);
2876 if (product / (to_len-1) != count) {
2877 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2878 return NULL;
2879 }
2880 result_len = self_len + product;
2881 if (result_len < 0) {
2882 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2883 return NULL;
2884 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002885
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002886 if ( (result = (PyStringObject *)
2887 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2888 return NULL;
2889 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002890
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891 start = self_s;
2892 end = self_s + self_len;
2893 while (count-- > 0) {
2894 next = findchar(start, end-start, from_c);
2895 if (next == NULL)
2896 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002897
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 if (next == start) {
2899 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002900 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 result_s += to_len;
2902 start += 1;
2903 } else {
2904 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002905 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002907 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 result_s += to_len;
2909 start = next+1;
2910 }
2911 }
2912 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002913 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002914
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915 return result;
2916}
2917
2918/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002919Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002921 const char *from_s, Py_ssize_t from_len,
2922 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002924 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002925 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002926 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002927 Py_ssize_t count, offset, product;
2928 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002929
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930 self_s = PyString_AS_STRING(self);
2931 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002932
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002933 count = countstring(self_s, self_len,
2934 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002935 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002936 if (count == 0) {
2937 /* no matches, return unchanged */
2938 return return_self(self);
2939 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002940
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002941 /* Check for overflow */
2942 /* result_len = self_len + count * (to_len-from_len) */
2943 product = count * (to_len-from_len);
2944 if (product / (to_len-from_len) != count) {
2945 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2946 return NULL;
2947 }
2948 result_len = self_len + product;
2949 if (result_len < 0) {
2950 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2951 return NULL;
2952 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002953
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002954 if ( (result = (PyStringObject *)
2955 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2956 return NULL;
2957 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002958
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 start = self_s;
2960 end = self_s + self_len;
2961 while (count-- > 0) {
2962 offset = findstring(start, end-start,
2963 from_s, from_len,
2964 0, end-start, FORWARD);
2965 if (offset == -1)
2966 break;
2967 next = start+offset;
2968 if (next == start) {
2969 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002970 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002971 result_s += to_len;
2972 start += from_len;
2973 } else {
2974 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002975 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002977 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 result_s += to_len;
2979 start = next+from_len;
2980 }
2981 }
2982 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002983 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002984
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002985 return result;
2986}
2987
2988
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002989Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002990replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002991 const char *from_s, Py_ssize_t from_len,
2992 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 Py_ssize_t maxcount)
2994{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002995 if (maxcount < 0) {
2996 maxcount = PY_SSIZE_T_MAX;
2997 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2998 /* nothing to do; return the original string */
2999 return return_self(self);
3000 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003001
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003002 if (maxcount == 0 ||
3003 (from_len == 0 && to_len == 0)) {
3004 /* nothing to do; return the original string */
3005 return return_self(self);
3006 }
3007
3008 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003009
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003010 if (from_len == 0) {
3011 /* insert the 'to' string everywhere. */
3012 /* >>> "Python".replace("", ".") */
3013 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003014 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003015 }
3016
3017 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3018 /* point for an empty self string to generate a non-empty string */
3019 /* Special case so the remaining code always gets a non-empty string */
3020 if (PyString_GET_SIZE(self) == 0) {
3021 return return_self(self);
3022 }
3023
3024 if (to_len == 0) {
3025 /* delete all occurances of 'from' string */
3026 if (from_len == 1) {
3027 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003028 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003030 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003031 }
3032 }
3033
3034 /* Handle special case where both strings have the same length */
3035
3036 if (from_len == to_len) {
3037 if (from_len == 1) {
3038 return replace_single_character_in_place(
3039 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003040 from_s[0],
3041 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 maxcount);
3043 } else {
3044 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003045 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003046 }
3047 }
3048
3049 /* Otherwise use the more generic algorithms */
3050 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003051 return replace_single_character(self, from_s[0],
3052 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 } else {
3054 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003055 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003056 }
3057}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003059PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003060"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003061\n\
3062Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003063old replaced by new. If the optional argument count is\n\
3064given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003065
3066static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003067string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003068{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003069 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003071 const char *from_s, *to_s;
3072 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003073
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003074 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003078 from_s = PyString_AS_STRING(from);
3079 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003081#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003083 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003085#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003086 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087 return NULL;
3088
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003090 to_s = PyString_AS_STRING(to);
3091 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003093#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003094 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003095 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003096 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003097#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003098 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 return NULL;
3100
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003101 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003102 from_s, from_len,
3103 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003104}
3105
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003106/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003107
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003108/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003109 * against substr, using the start and end arguments. Returns
3110 * -1 on error, 0 if not found and 1 if found.
3111 */
3112Py_LOCAL(int)
3113_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3114 Py_ssize_t end, int direction)
3115{
3116 Py_ssize_t len = PyString_GET_SIZE(self);
3117 Py_ssize_t slen;
3118 const char* sub;
3119 const char* str;
3120
3121 if (PyString_Check(substr)) {
3122 sub = PyString_AS_STRING(substr);
3123 slen = PyString_GET_SIZE(substr);
3124 }
3125#ifdef Py_USING_UNICODE
3126 else if (PyUnicode_Check(substr))
3127 return PyUnicode_Tailmatch((PyObject *)self,
3128 substr, start, end, direction);
3129#endif
3130 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3131 return -1;
3132 str = PyString_AS_STRING(self);
3133
3134 string_adjust_indices(&start, &end, len);
3135
3136 if (direction < 0) {
3137 /* startswith */
3138 if (start+slen > len)
3139 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003140 } else {
3141 /* endswith */
3142 if (end-start < slen || start > len)
3143 return 0;
3144
3145 if (end-slen > start)
3146 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003147 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003148 if (end-start >= slen)
3149 return ! memcmp(str+start, sub, slen);
3150 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003151}
3152
3153
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003154PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003155"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003156\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003157Return True if S starts with the specified prefix, False otherwise.\n\
3158With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003159With optional end, stop comparing S at that position.\n\
3160prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003161
3162static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003163string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003164{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003165 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003166 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003168 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169
Guido van Rossumc6821402000-05-08 14:08:05 +00003170 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3171 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003172 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003173 if (PyTuple_Check(subobj)) {
3174 Py_ssize_t i;
3175 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3176 result = _string_tailmatch(self,
3177 PyTuple_GET_ITEM(subobj, i),
3178 start, end, -1);
3179 if (result == -1)
3180 return NULL;
3181 else if (result) {
3182 Py_RETURN_TRUE;
3183 }
3184 }
3185 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 }
Georg Brandl24250812006-06-09 18:45:48 +00003187 result = _string_tailmatch(self, subobj, start, end, -1);
3188 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003189 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003190 else
Georg Brandl24250812006-06-09 18:45:48 +00003191 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003192}
3193
3194
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003195PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003196"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003197\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003198Return True if S ends with the specified suffix, False otherwise.\n\
3199With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003200With optional end, stop comparing S at that position.\n\
3201suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202
3203static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003204string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003205{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003206 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003207 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003209 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003210
Guido van Rossumc6821402000-05-08 14:08:05 +00003211 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3212 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003214 if (PyTuple_Check(subobj)) {
3215 Py_ssize_t i;
3216 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3217 result = _string_tailmatch(self,
3218 PyTuple_GET_ITEM(subobj, i),
3219 start, end, +1);
3220 if (result == -1)
3221 return NULL;
3222 else if (result) {
3223 Py_RETURN_TRUE;
3224 }
3225 }
3226 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003227 }
Georg Brandl24250812006-06-09 18:45:48 +00003228 result = _string_tailmatch(self, subobj, start, end, +1);
3229 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003230 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003231 else
Georg Brandl24250812006-06-09 18:45:48 +00003232 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003233}
3234
3235
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003236PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003237"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003238\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003239Encodes S using the codec registered for encoding. encoding defaults\n\
3240to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003241handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003242a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3243'xmlcharrefreplace' as well as any other name registered with\n\
3244codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003245
3246static PyObject *
3247string_encode(PyStringObject *self, PyObject *args)
3248{
3249 char *encoding = NULL;
3250 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003251 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003252
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003253 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3254 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003255 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003256 if (v == NULL)
3257 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003258 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3259 PyErr_Format(PyExc_TypeError,
3260 "encoder did not return a string/unicode object "
3261 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003262 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003263 Py_DECREF(v);
3264 return NULL;
3265 }
3266 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003267
3268 onError:
3269 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003270}
3271
3272
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003273PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003274"S.decode([encoding[,errors]]) -> object\n\
3275\n\
3276Decodes S using the codec registered for encoding. encoding defaults\n\
3277to the default encoding. errors may be given to set a different error\n\
3278handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003279a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3280as well as any other name registerd with codecs.register_error that is\n\
3281able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003282
3283static PyObject *
3284string_decode(PyStringObject *self, PyObject *args)
3285{
3286 char *encoding = NULL;
3287 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003288 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003289
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003290 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3291 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003292 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003293 if (v == NULL)
3294 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003295 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3296 PyErr_Format(PyExc_TypeError,
3297 "decoder did not return a string/unicode object "
3298 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003299 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003300 Py_DECREF(v);
3301 return NULL;
3302 }
3303 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003304
3305 onError:
3306 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003307}
3308
3309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003310PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003311"S.expandtabs([tabsize]) -> string\n\
3312\n\
3313Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003314If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003315
3316static PyObject*
3317string_expandtabs(PyStringObject *self, PyObject *args)
3318{
3319 const char *e, *p;
3320 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003321 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003322 PyObject *u;
3323 int tabsize = 8;
3324
3325 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3326 return NULL;
3327
Thomas Wouters7e474022000-07-16 12:04:32 +00003328 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003329 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003330 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3331 for (p = PyString_AS_STRING(self); p < e; p++)
3332 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003333 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003334 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003335 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003336 PyErr_SetString(PyExc_OverflowError,
3337 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003338 return NULL;
3339 }
3340 old_j = j;
3341 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 }
3343 else {
3344 j++;
3345 if (*p == '\n' || *p == '\r') {
3346 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003347 old_j = j = 0;
3348 if (i < 0) {
3349 PyErr_SetString(PyExc_OverflowError,
3350 "new string is too long");
3351 return NULL;
3352 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353 }
3354 }
3355
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003356 if ((i + j) < 0) {
3357 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3358 return NULL;
3359 }
3360
Guido van Rossum4c08d552000-03-10 22:55:18 +00003361 /* Second pass: create output string and fill it */
3362 u = PyString_FromStringAndSize(NULL, i + j);
3363 if (!u)
3364 return NULL;
3365
3366 j = 0;
3367 q = PyString_AS_STRING(u);
3368
3369 for (p = PyString_AS_STRING(self); p < e; p++)
3370 if (*p == '\t') {
3371 if (tabsize > 0) {
3372 i = tabsize - (j % tabsize);
3373 j += i;
3374 while (i--)
3375 *q++ = ' ';
3376 }
3377 }
3378 else {
3379 j++;
3380 *q++ = *p;
3381 if (*p == '\n' || *p == '\r')
3382 j = 0;
3383 }
3384
3385 return u;
3386}
3387
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003388Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003389pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390{
3391 PyObject *u;
3392
3393 if (left < 0)
3394 left = 0;
3395 if (right < 0)
3396 right = 0;
3397
Tim Peters8fa5dd02001-09-12 02:18:30 +00003398 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 Py_INCREF(self);
3400 return (PyObject *)self;
3401 }
3402
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003403 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003404 left + PyString_GET_SIZE(self) + right);
3405 if (u) {
3406 if (left)
3407 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003408 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003409 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 PyString_GET_SIZE(self));
3411 if (right)
3412 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3413 fill, right);
3414 }
3415
3416 return u;
3417}
3418
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003419PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003420"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003421"\n"
3422"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003423"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003424
3425static PyObject *
3426string_ljust(PyStringObject *self, PyObject *args)
3427{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003428 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429 char fillchar = ' ';
3430
Thomas Wouters4abb3662006-04-19 14:50:15 +00003431 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432 return NULL;
3433
Tim Peters8fa5dd02001-09-12 02:18:30 +00003434 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435 Py_INCREF(self);
3436 return (PyObject*) self;
3437 }
3438
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003439 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440}
3441
3442
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003443PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003445"\n"
3446"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003447"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448
3449static PyObject *
3450string_rjust(PyStringObject *self, PyObject *args)
3451{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003452 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003453 char fillchar = ' ';
3454
Thomas Wouters4abb3662006-04-19 14:50:15 +00003455 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456 return NULL;
3457
Tim Peters8fa5dd02001-09-12 02:18:30 +00003458 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003459 Py_INCREF(self);
3460 return (PyObject*) self;
3461 }
3462
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003463 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464}
3465
3466
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003467PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003468"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003469"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003470"Return S centered in a string of length width. Padding is\n"
3471"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003472
3473static PyObject *
3474string_center(PyStringObject *self, PyObject *args)
3475{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003476 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003477 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003478 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003479
Thomas Wouters4abb3662006-04-19 14:50:15 +00003480 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003481 return NULL;
3482
Tim Peters8fa5dd02001-09-12 02:18:30 +00003483 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484 Py_INCREF(self);
3485 return (PyObject*) self;
3486 }
3487
3488 marg = width - PyString_GET_SIZE(self);
3489 left = marg / 2 + (marg & width & 1);
3490
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003491 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492}
3493
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003494PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003495"S.zfill(width) -> string\n"
3496"\n"
3497"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003498"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003499
3500static PyObject *
3501string_zfill(PyStringObject *self, PyObject *args)
3502{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003503 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003504 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003505 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003506 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003507
Thomas Wouters4abb3662006-04-19 14:50:15 +00003508 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003509 return NULL;
3510
3511 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003512 if (PyString_CheckExact(self)) {
3513 Py_INCREF(self);
3514 return (PyObject*) self;
3515 }
3516 else
3517 return PyString_FromStringAndSize(
3518 PyString_AS_STRING(self),
3519 PyString_GET_SIZE(self)
3520 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003521 }
3522
3523 fill = width - PyString_GET_SIZE(self);
3524
3525 s = pad(self, fill, 0, '0');
3526
3527 if (s == NULL)
3528 return NULL;
3529
3530 p = PyString_AS_STRING(s);
3531 if (p[fill] == '+' || p[fill] == '-') {
3532 /* move sign to beginning of string */
3533 p[0] = p[fill];
3534 p[fill] = '0';
3535 }
3536
3537 return (PyObject*) s;
3538}
3539
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003540PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003541"S.isspace() -> bool\n\
3542\n\
3543Return True if all characters in S are whitespace\n\
3544and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545
3546static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003547string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548{
Fred Drakeba096332000-07-09 07:04:36 +00003549 register const unsigned char *p
3550 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003551 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553 /* Shortcut for single character strings */
3554 if (PyString_GET_SIZE(self) == 1 &&
3555 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003556 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003558 /* Special case for empty strings */
3559 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003561
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562 e = p + PyString_GET_SIZE(self);
3563 for (; p < e; p++) {
3564 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003568}
3569
3570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003571PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003572"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003574Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003575and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576
3577static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003578string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579{
Fred Drakeba096332000-07-09 07:04:36 +00003580 register const unsigned char *p
3581 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582 register const unsigned char *e;
3583
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584 /* Shortcut for single character strings */
3585 if (PyString_GET_SIZE(self) == 1 &&
3586 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003588
3589 /* Special case for empty strings */
3590 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003591 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592
3593 e = p + PyString_GET_SIZE(self);
3594 for (; p < e; p++) {
3595 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599}
3600
3601
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003602PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003603"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003604\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003605Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003606and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607
3608static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003609string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610{
Fred Drakeba096332000-07-09 07:04:36 +00003611 register const unsigned char *p
3612 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613 register const unsigned char *e;
3614
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003615 /* Shortcut for single character strings */
3616 if (PyString_GET_SIZE(self) == 1 &&
3617 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003619
3620 /* Special case for empty strings */
3621 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003622 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623
3624 e = p + PyString_GET_SIZE(self);
3625 for (; p < e; p++) {
3626 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003628 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003630}
3631
3632
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003633PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003636Return True if all characters in S are digits\n\
3637and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638
3639static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003640string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641{
Fred Drakeba096332000-07-09 07:04:36 +00003642 register const unsigned char *p
3643 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003644 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646 /* Shortcut for single character strings */
3647 if (PyString_GET_SIZE(self) == 1 &&
3648 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003651 /* Special case for empty strings */
3652 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003654
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 e = p + PyString_GET_SIZE(self);
3656 for (; p < e; p++) {
3657 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661}
3662
3663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003664PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003668at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669
3670static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003671string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672{
Fred Drakeba096332000-07-09 07:04:36 +00003673 register const unsigned char *p
3674 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003675 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 int cased;
3677
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 /* Shortcut for single character strings */
3679 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003682 /* Special case for empty strings */
3683 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003685
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 e = p + PyString_GET_SIZE(self);
3687 cased = 0;
3688 for (; p < e; p++) {
3689 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691 else if (!cased && islower(*p))
3692 cased = 1;
3693 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695}
3696
3697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003698PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003701Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003702at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
3704static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003705string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706{
Fred Drakeba096332000-07-09 07:04:36 +00003707 register const unsigned char *p
3708 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003709 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710 int cased;
3711
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 /* Shortcut for single character strings */
3713 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003716 /* Special case for empty strings */
3717 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003719
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 e = p + PyString_GET_SIZE(self);
3721 cased = 0;
3722 for (; p < e; p++) {
3723 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725 else if (!cased && isupper(*p))
3726 cased = 1;
3727 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729}
3730
3731
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003732PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003733"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003735Return True if S is a titlecased string and there is at least one\n\
3736character in S, i.e. uppercase characters may only follow uncased\n\
3737characters and lowercase characters only cased ones. Return False\n\
3738otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739
3740static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003741string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742{
Fred Drakeba096332000-07-09 07:04:36 +00003743 register const unsigned char *p
3744 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003745 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 int cased, previous_is_cased;
3747
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 /* Shortcut for single character strings */
3749 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003752 /* Special case for empty strings */
3753 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003755
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 e = p + PyString_GET_SIZE(self);
3757 cased = 0;
3758 previous_is_cased = 0;
3759 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003760 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761
3762 if (isupper(ch)) {
3763 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 previous_is_cased = 1;
3766 cased = 1;
3767 }
3768 else if (islower(ch)) {
3769 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003770 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771 previous_is_cased = 1;
3772 cased = 1;
3773 }
3774 else
3775 previous_is_cased = 0;
3776 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003777 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778}
3779
3780
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003781PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003782"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783\n\
3784Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003785Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003786is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788static PyObject*
3789string_splitlines(PyStringObject *self, PyObject *args)
3790{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003791 register Py_ssize_t i;
3792 register Py_ssize_t j;
3793 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003794 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795 PyObject *list;
3796 PyObject *str;
3797 char *data;
3798
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003799 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 return NULL;
3801
3802 data = PyString_AS_STRING(self);
3803 len = PyString_GET_SIZE(self);
3804
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003805 /* This does not use the preallocated list because splitlines is
3806 usually run with hundreds of newlines. The overhead of
3807 switching between PyList_SET_ITEM and append causes about a
3808 2-3% slowdown for that common case. A smarter implementation
3809 could move the if check out, so the SET_ITEMs are done first
3810 and the appends only done when the prealloc buffer is full.
3811 That's too much work for little gain.*/
3812
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813 list = PyList_New(0);
3814 if (!list)
3815 goto onError;
3816
3817 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003818 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003819
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 /* Find a line and append it */
3821 while (i < len && data[i] != '\n' && data[i] != '\r')
3822 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003823
3824 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003825 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 if (i < len) {
3827 if (data[i] == '\r' && i + 1 < len &&
3828 data[i+1] == '\n')
3829 i += 2;
3830 else
3831 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003832 if (keepends)
3833 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003835 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836 j = i;
3837 }
3838 if (j < len) {
3839 SPLIT_APPEND(data, j, len);
3840 }
3841
3842 return list;
3843
3844 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003845 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003846 return NULL;
3847}
3848
3849#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003850#undef SPLIT_ADD
3851#undef MAX_PREALLOC
3852#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003854static PyObject *
3855string_getnewargs(PyStringObject *v)
3856{
Martin v. Löwis68192102007-07-21 06:55:02 +00003857 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003858}
3859
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003860
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003861static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003862string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003863 /* Counterparts of the obsolete stropmodule functions; except
3864 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003865 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3866 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003867 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003868 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3869 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003870 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3871 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3872 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3873 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3874 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3875 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3876 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003877 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3878 capitalize__doc__},
3879 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3880 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3881 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003882 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003883 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3884 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3885 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3886 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3887 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3888 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3889 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003890 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3891 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003892 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3893 startswith__doc__},
3894 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3895 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3896 swapcase__doc__},
3897 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3898 translate__doc__},
3899 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3900 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3901 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3902 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3903 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3904 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3905 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3906 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3907 expandtabs__doc__},
3908 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3909 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003910 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003911 {NULL, NULL} /* sentinel */
3912};
3913
Jeremy Hylton938ace62002-07-17 16:30:39 +00003914static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003915str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3916
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003917static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003918string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003919{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003920 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003921 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003922
Guido van Rossumae960af2001-08-30 03:11:59 +00003923 if (type != &PyString_Type)
3924 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003925 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3926 return NULL;
3927 if (x == NULL)
3928 return PyString_FromString("");
3929 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003930}
3931
Guido van Rossumae960af2001-08-30 03:11:59 +00003932static PyObject *
3933str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3934{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003935 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003936 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003937
3938 assert(PyType_IsSubtype(type, &PyString_Type));
3939 tmp = string_new(&PyString_Type, args, kwds);
3940 if (tmp == NULL)
3941 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003942 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003943 n = PyString_GET_SIZE(tmp);
3944 pnew = type->tp_alloc(type, n);
3945 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003946 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003947 ((PyStringObject *)pnew)->ob_shash =
3948 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003949 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003950 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003951 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003952 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003953}
3954
Guido van Rossumcacfc072002-05-24 19:01:59 +00003955static PyObject *
3956basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3957{
3958 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003959 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003960 return NULL;
3961}
3962
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003963static PyObject *
3964string_mod(PyObject *v, PyObject *w)
3965{
3966 if (!PyString_Check(v)) {
3967 Py_INCREF(Py_NotImplemented);
3968 return Py_NotImplemented;
3969 }
3970 return PyString_Format(v, w);
3971}
3972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003973PyDoc_STRVAR(basestring_doc,
3974"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003975
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003976static PyNumberMethods string_as_number = {
3977 0, /*nb_add*/
3978 0, /*nb_subtract*/
3979 0, /*nb_multiply*/
3980 0, /*nb_divide*/
3981 string_mod, /*nb_remainder*/
3982};
3983
3984
Guido van Rossumcacfc072002-05-24 19:01:59 +00003985PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00003986 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003987 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003988 0,
3989 0,
3990 0, /* tp_dealloc */
3991 0, /* tp_print */
3992 0, /* tp_getattr */
3993 0, /* tp_setattr */
3994 0, /* tp_compare */
3995 0, /* tp_repr */
3996 0, /* tp_as_number */
3997 0, /* tp_as_sequence */
3998 0, /* tp_as_mapping */
3999 0, /* tp_hash */
4000 0, /* tp_call */
4001 0, /* tp_str */
4002 0, /* tp_getattro */
4003 0, /* tp_setattro */
4004 0, /* tp_as_buffer */
4005 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4006 basestring_doc, /* tp_doc */
4007 0, /* tp_traverse */
4008 0, /* tp_clear */
4009 0, /* tp_richcompare */
4010 0, /* tp_weaklistoffset */
4011 0, /* tp_iter */
4012 0, /* tp_iternext */
4013 0, /* tp_methods */
4014 0, /* tp_members */
4015 0, /* tp_getset */
4016 &PyBaseObject_Type, /* tp_base */
4017 0, /* tp_dict */
4018 0, /* tp_descr_get */
4019 0, /* tp_descr_set */
4020 0, /* tp_dictoffset */
4021 0, /* tp_init */
4022 0, /* tp_alloc */
4023 basestring_new, /* tp_new */
4024 0, /* tp_free */
4025};
4026
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004027PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004028"str(object) -> string\n\
4029\n\
4030Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004031If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004032
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004034 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004035 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004037 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004038 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039 (printfunc)string_print, /* tp_print */
4040 0, /* tp_getattr */
4041 0, /* tp_setattr */
4042 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004043 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004044 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004045 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004046 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004047 (hashfunc)string_hash, /* tp_hash */
4048 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004049 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050 PyObject_GenericGetAttr, /* tp_getattro */
4051 0, /* tp_setattro */
4052 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004053 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004054 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004055 string_doc, /* tp_doc */
4056 0, /* tp_traverse */
4057 0, /* tp_clear */
4058 (richcmpfunc)string_richcompare, /* tp_richcompare */
4059 0, /* tp_weaklistoffset */
4060 0, /* tp_iter */
4061 0, /* tp_iternext */
4062 string_methods, /* tp_methods */
4063 0, /* tp_members */
4064 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004065 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004066 0, /* tp_dict */
4067 0, /* tp_descr_get */
4068 0, /* tp_descr_set */
4069 0, /* tp_dictoffset */
4070 0, /* tp_init */
4071 0, /* tp_alloc */
4072 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004073 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004074};
4075
4076void
Fred Drakeba096332000-07-09 07:04:36 +00004077PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004080 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004081 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 if (w == NULL || !PyString_Check(*pv)) {
4083 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004084 *pv = NULL;
4085 return;
4086 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004087 v = string_concat((PyStringObject *) *pv, w);
4088 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089 *pv = v;
4090}
4091
Guido van Rossum013142a1994-08-30 08:19:36 +00004092void
Fred Drakeba096332000-07-09 07:04:36 +00004093PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004094{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004095 PyString_Concat(pv, w);
4096 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004097}
4098
4099
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100/* The following function breaks the notion that strings are immutable:
4101 it changes the size of a string. We get away with this only if there
4102 is only one module referencing the object. You can also think of it
4103 as creating a new string object and destroying the old one, only
4104 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004105 already be known to some other part of the code...
4106 Note that if there's not enough memory to resize the string, the original
4107 string object at *pv is deallocated, *pv is set to NULL, an "out of
4108 memory" exception is set, and -1 is returned. Else (on success) 0 is
4109 returned, and the value in *pv may or may not be the same as on input.
4110 As always, an extra byte is allocated for a trailing \0 byte (newsize
4111 does *not* include that), and a trailing \0 byte is stored.
4112*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004113
4114int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004115_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004116{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004117 register PyObject *v;
4118 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004119 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004120 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004121 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004122 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123 Py_DECREF(v);
4124 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004125 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004126 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004127 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004128 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004129 _Py_ForgetReference(v);
4130 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004131 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004132 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004133 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004135 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004136 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004137 _Py_NewReference(*pv);
4138 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004139 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004140 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004141 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004142 return 0;
4143}
Guido van Rossume5372401993-03-16 12:15:04 +00004144
4145/* Helpers for formatstring */
4146
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004147Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004148getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004149{
Thomas Wouters977485d2006-02-16 15:59:12 +00004150 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004151 if (argidx < arglen) {
4152 (*p_argidx)++;
4153 if (arglen < 0)
4154 return args;
4155 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004156 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004157 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004158 PyErr_SetString(PyExc_TypeError,
4159 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004160 return NULL;
4161}
4162
Tim Peters38fd5b62000-09-21 05:43:11 +00004163/* Format codes
4164 * F_LJUST '-'
4165 * F_SIGN '+'
4166 * F_BLANK ' '
4167 * F_ALT '#'
4168 * F_ZERO '0'
4169 */
Guido van Rossume5372401993-03-16 12:15:04 +00004170#define F_LJUST (1<<0)
4171#define F_SIGN (1<<1)
4172#define F_BLANK (1<<2)
4173#define F_ALT (1<<3)
4174#define F_ZERO (1<<4)
4175
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004176Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004177formatfloat(char *buf, size_t buflen, int flags,
4178 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004179{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004180 /* fmt = '%#.' + `prec` + `type`
4181 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004182 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004183 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004184 x = PyFloat_AsDouble(v);
4185 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004186 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004187 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004188 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004189 }
Guido van Rossume5372401993-03-16 12:15:04 +00004190 if (prec < 0)
4191 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004192 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4193 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004194 /* Worst case length calc to ensure no buffer overrun:
4195
4196 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004197 fmt = %#.<prec>g
4198 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004199 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004200 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004201
4202 'f' formats:
4203 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4204 len = 1 + 50 + 1 + prec = 52 + prec
4205
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004206 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004207 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004208
4209 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004210 if (((type == 'g' || type == 'G') &&
4211 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004212 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004213 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004214 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004215 return -1;
4216 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004217 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4218 (flags&F_ALT) ? "#" : "",
4219 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004220 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004221 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004222}
4223
Tim Peters38fd5b62000-09-21 05:43:11 +00004224/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4225 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4226 * Python's regular ints.
4227 * Return value: a new PyString*, or NULL if error.
4228 * . *pbuf is set to point into it,
4229 * *plen set to the # of chars following that.
4230 * Caller must decref it when done using pbuf.
4231 * The string starting at *pbuf is of the form
4232 * "-"? ("0x" | "0X")? digit+
4233 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004234 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004235 * There will be at least prec digits, zero-filled on the left if
4236 * necessary to get that many.
4237 * val object to be converted
4238 * flags bitmask of format flags; only F_ALT is looked at
4239 * prec minimum number of digits; 0-fill on left if needed
4240 * type a character in [duoxX]; u acts the same as d
4241 *
4242 * CAUTION: o, x and X conversions on regular ints can never
4243 * produce a '-' sign, but can for Python's unbounded ints.
4244 */
4245PyObject*
4246_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4247 char **pbuf, int *plen)
4248{
4249 PyObject *result = NULL;
4250 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004251 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004252 int sign; /* 1 if '-', else 0 */
4253 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004254 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004255 int numdigits; /* len == numnondigits + numdigits */
4256 int numnondigits = 0;
4257
4258 switch (type) {
4259 case 'd':
4260 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004261 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004262 break;
4263 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004264 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004265 break;
4266 case 'x':
4267 case 'X':
4268 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004269 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 break;
4271 default:
4272 assert(!"'type' not in [duoxX]");
4273 }
4274 if (!result)
4275 return NULL;
4276
Neal Norwitz56423e52006-08-13 18:11:08 +00004277 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004278 if (!buf) {
4279 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004280 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004281 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004282
Tim Peters38fd5b62000-09-21 05:43:11 +00004283 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004284 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004285 PyErr_BadInternalCall();
4286 return NULL;
4287 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004288 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004289 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004290 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4291 return NULL;
4292 }
4293 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004294 if (buf[len-1] == 'L') {
4295 --len;
4296 buf[len] = '\0';
4297 }
4298 sign = buf[0] == '-';
4299 numnondigits += sign;
4300 numdigits = len - numnondigits;
4301 assert(numdigits > 0);
4302
Tim Petersfff53252001-04-12 18:38:48 +00004303 /* Get rid of base marker unless F_ALT */
4304 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004305 /* Need to skip 0x, 0X or 0. */
4306 int skipped = 0;
4307 switch (type) {
4308 case 'o':
4309 assert(buf[sign] == '0');
4310 /* If 0 is only digit, leave it alone. */
4311 if (numdigits > 1) {
4312 skipped = 1;
4313 --numdigits;
4314 }
4315 break;
4316 case 'x':
4317 case 'X':
4318 assert(buf[sign] == '0');
4319 assert(buf[sign + 1] == 'x');
4320 skipped = 2;
4321 numnondigits -= 2;
4322 break;
4323 }
4324 if (skipped) {
4325 buf += skipped;
4326 len -= skipped;
4327 if (sign)
4328 buf[0] = '-';
4329 }
4330 assert(len == numnondigits + numdigits);
4331 assert(numdigits > 0);
4332 }
4333
4334 /* Fill with leading zeroes to meet minimum width. */
4335 if (prec > numdigits) {
4336 PyObject *r1 = PyString_FromStringAndSize(NULL,
4337 numnondigits + prec);
4338 char *b1;
4339 if (!r1) {
4340 Py_DECREF(result);
4341 return NULL;
4342 }
4343 b1 = PyString_AS_STRING(r1);
4344 for (i = 0; i < numnondigits; ++i)
4345 *b1++ = *buf++;
4346 for (i = 0; i < prec - numdigits; i++)
4347 *b1++ = '0';
4348 for (i = 0; i < numdigits; i++)
4349 *b1++ = *buf++;
4350 *b1 = '\0';
4351 Py_DECREF(result);
4352 result = r1;
4353 buf = PyString_AS_STRING(result);
4354 len = numnondigits + prec;
4355 }
4356
4357 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004358 if (type == 'X') {
4359 /* Need to convert all lower case letters to upper case.
4360 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004361 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004362 if (buf[i] >= 'a' && buf[i] <= 'x')
4363 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004364 }
4365 *pbuf = buf;
4366 *plen = len;
4367 return result;
4368}
4369
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004370Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004371formatint(char *buf, size_t buflen, int flags,
4372 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004373{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004374 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004375 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4376 + 1 + 1 = 24 */
4377 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004378 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004379 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004380
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004381 x = PyInt_AsLong(v);
4382 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004383 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004384 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004385 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004386 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004387 if (x < 0 && type == 'u') {
4388 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004389 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004390 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4391 sign = "-";
4392 else
4393 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004394 if (prec < 0)
4395 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004396
4397 if ((flags & F_ALT) &&
4398 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004399 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004400 * of issues that cause pain:
4401 * - when 0 is being converted, the C standard leaves off
4402 * the '0x' or '0X', which is inconsistent with other
4403 * %#x/%#X conversions and inconsistent with Python's
4404 * hex() function
4405 * - there are platforms that violate the standard and
4406 * convert 0 with the '0x' or '0X'
4407 * (Metrowerks, Compaq Tru64)
4408 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004409 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004410 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004411 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004412 * We can achieve the desired consistency by inserting our
4413 * own '0x' or '0X' prefix, and substituting %x/%X in place
4414 * of %#x/%#X.
4415 *
4416 * Note that this is the same approach as used in
4417 * formatint() in unicodeobject.c
4418 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004419 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4420 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004421 }
4422 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004423 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4424 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004425 prec, type);
4426 }
4427
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004428 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4429 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004430 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004431 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004432 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004433 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004434 return -1;
4435 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004436 if (sign[0])
4437 PyOS_snprintf(buf, buflen, fmt, -x);
4438 else
4439 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004440 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004441}
4442
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004443Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004444formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004445{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004446 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004447 if (PyString_Check(v)) {
4448 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004449 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004450 }
4451 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004452 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004453 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004454 }
4455 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004456 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004457}
4458
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004459/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4460
4461 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4462 chars are formatted. XXX This is a magic number. Each formatting
4463 routine does bounds checking to ensure no overflow, but a better
4464 solution may be to malloc a buffer of appropriate size for each
4465 format. For now, the current solution is sufficient.
4466*/
4467#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004468
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004469PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004470PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004471{
4472 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004473 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004474 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004475 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004476 PyObject *result, *orig_args;
4477#ifdef Py_USING_UNICODE
4478 PyObject *v, *w;
4479#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480 PyObject *dict = NULL;
4481 if (format == NULL || !PyString_Check(format) || args == NULL) {
4482 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004483 return NULL;
4484 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004485 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004486 fmt = PyString_AS_STRING(format);
4487 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004488 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004490 if (result == NULL)
4491 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004492 res = PyString_AsString(result);
4493 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004494 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004495 argidx = 0;
4496 }
4497 else {
4498 arglen = -1;
4499 argidx = -2;
4500 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004501 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004502 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004503 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004504 while (--fmtcnt >= 0) {
4505 if (*fmt != '%') {
4506 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004507 rescnt = fmtcnt + 100;
4508 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004509 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004510 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004511 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004512 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004514 }
4515 *res++ = *fmt++;
4516 }
4517 else {
4518 /* Got a format specifier */
4519 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004520 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004521 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004522 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004523 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004524 PyObject *v = NULL;
4525 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004526 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004527 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004528 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004529 char formatbuf[FORMATBUFLEN];
4530 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004531#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004532 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004533 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004534#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004535
Guido van Rossumda9c2711996-12-05 21:58:58 +00004536 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004537 if (*fmt == '(') {
4538 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004539 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004540 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004541 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004542
4543 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004544 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004545 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004546 goto error;
4547 }
4548 ++fmt;
4549 --fmtcnt;
4550 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004551 /* Skip over balanced parentheses */
4552 while (pcount > 0 && --fmtcnt >= 0) {
4553 if (*fmt == ')')
4554 --pcount;
4555 else if (*fmt == '(')
4556 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004557 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004558 }
4559 keylen = fmt - keystart - 1;
4560 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004561 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004562 "incomplete format key");
4563 goto error;
4564 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004565 key = PyString_FromStringAndSize(keystart,
4566 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004567 if (key == NULL)
4568 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004569 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004570 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004571 args_owned = 0;
4572 }
4573 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004574 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004575 if (args == NULL) {
4576 goto error;
4577 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004578 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004579 arglen = -1;
4580 argidx = -2;
4581 }
Guido van Rossume5372401993-03-16 12:15:04 +00004582 while (--fmtcnt >= 0) {
4583 switch (c = *fmt++) {
4584 case '-': flags |= F_LJUST; continue;
4585 case '+': flags |= F_SIGN; continue;
4586 case ' ': flags |= F_BLANK; continue;
4587 case '#': flags |= F_ALT; continue;
4588 case '0': flags |= F_ZERO; continue;
4589 }
4590 break;
4591 }
4592 if (c == '*') {
4593 v = getnextarg(args, arglen, &argidx);
4594 if (v == NULL)
4595 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 if (!PyInt_Check(v)) {
4597 PyErr_SetString(PyExc_TypeError,
4598 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004599 goto error;
4600 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004601 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004602 if (width < 0) {
4603 flags |= F_LJUST;
4604 width = -width;
4605 }
Guido van Rossume5372401993-03-16 12:15:04 +00004606 if (--fmtcnt >= 0)
4607 c = *fmt++;
4608 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004609 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004610 width = c - '0';
4611 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004612 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004613 if (!isdigit(c))
4614 break;
4615 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004616 PyErr_SetString(
4617 PyExc_ValueError,
4618 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004619 goto error;
4620 }
4621 width = width*10 + (c - '0');
4622 }
4623 }
4624 if (c == '.') {
4625 prec = 0;
4626 if (--fmtcnt >= 0)
4627 c = *fmt++;
4628 if (c == '*') {
4629 v = getnextarg(args, arglen, &argidx);
4630 if (v == NULL)
4631 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004632 if (!PyInt_Check(v)) {
4633 PyErr_SetString(
4634 PyExc_TypeError,
4635 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004636 goto error;
4637 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004638 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004639 if (prec < 0)
4640 prec = 0;
4641 if (--fmtcnt >= 0)
4642 c = *fmt++;
4643 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004644 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004645 prec = c - '0';
4646 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004647 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004648 if (!isdigit(c))
4649 break;
4650 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004651 PyErr_SetString(
4652 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004653 "prec too big");
4654 goto error;
4655 }
4656 prec = prec*10 + (c - '0');
4657 }
4658 }
4659 } /* prec */
4660 if (fmtcnt >= 0) {
4661 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004662 if (--fmtcnt >= 0)
4663 c = *fmt++;
4664 }
4665 }
4666 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004667 PyErr_SetString(PyExc_ValueError,
4668 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004669 goto error;
4670 }
4671 if (c != '%') {
4672 v = getnextarg(args, arglen, &argidx);
4673 if (v == NULL)
4674 goto error;
4675 }
4676 sign = 0;
4677 fill = ' ';
4678 switch (c) {
4679 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004680 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004681 len = 1;
4682 break;
4683 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004684#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004685 if (PyUnicode_Check(v)) {
4686 fmt = fmt_start;
4687 argidx = argidx_start;
4688 goto unicode;
4689 }
Georg Brandld45014b2005-10-01 17:06:00 +00004690#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004691 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004692#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004693 if (temp != NULL && PyUnicode_Check(temp)) {
4694 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004695 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004696 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004697 goto unicode;
4698 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004699#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004700 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004701 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004702 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004703 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004704 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004705 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004706 if (!PyString_Check(temp)) {
4707 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004708 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004709 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004710 goto error;
4711 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004712 pbuf = PyString_AS_STRING(temp);
4713 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004714 if (prec >= 0 && len > prec)
4715 len = prec;
4716 break;
4717 case 'i':
4718 case 'd':
4719 case 'u':
4720 case 'o':
4721 case 'x':
4722 case 'X':
4723 if (c == 'i')
4724 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004725 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004726 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004727 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004728 prec, c, &pbuf, &ilen);
4729 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004730 if (!temp)
4731 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004732 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004733 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004734 else {
4735 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004736 len = formatint(pbuf,
4737 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004738 flags, prec, c, v);
4739 if (len < 0)
4740 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004741 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004742 }
4743 if (flags & F_ZERO)
4744 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004745 break;
4746 case 'e':
4747 case 'E':
4748 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004749 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004750 case 'g':
4751 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004752 if (c == 'F')
4753 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004754 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004755 len = formatfloat(pbuf, sizeof(formatbuf),
4756 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004757 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004758 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004759 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004760 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004761 fill = '0';
4762 break;
4763 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004764#ifdef Py_USING_UNICODE
4765 if (PyUnicode_Check(v)) {
4766 fmt = fmt_start;
4767 argidx = argidx_start;
4768 goto unicode;
4769 }
4770#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004771 pbuf = formatbuf;
4772 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004773 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004774 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004775 break;
4776 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004777 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004778 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004779 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004780 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004781 (Py_ssize_t)(fmt - 1 -
4782 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004783 goto error;
4784 }
4785 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004786 if (*pbuf == '-' || *pbuf == '+') {
4787 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004788 len--;
4789 }
4790 else if (flags & F_SIGN)
4791 sign = '+';
4792 else if (flags & F_BLANK)
4793 sign = ' ';
4794 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004795 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004796 }
4797 if (width < len)
4798 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004799 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004800 reslen -= rescnt;
4801 rescnt = width + fmtcnt + 100;
4802 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004803 if (reslen < 0) {
4804 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004805 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004806 return PyErr_NoMemory();
4807 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004808 if (_PyString_Resize(&result, reslen) < 0) {
4809 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004810 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004811 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004812 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004813 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004814 }
4815 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004816 if (fill != ' ')
4817 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004818 rescnt--;
4819 if (width > len)
4820 width--;
4821 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004822 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4823 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004824 assert(pbuf[1] == c);
4825 if (fill != ' ') {
4826 *res++ = *pbuf++;
4827 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004828 }
Tim Petersfff53252001-04-12 18:38:48 +00004829 rescnt -= 2;
4830 width -= 2;
4831 if (width < 0)
4832 width = 0;
4833 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004834 }
4835 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004836 do {
4837 --rescnt;
4838 *res++ = fill;
4839 } while (--width > len);
4840 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004841 if (fill == ' ') {
4842 if (sign)
4843 *res++ = sign;
4844 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004845 (c == 'x' || c == 'X')) {
4846 assert(pbuf[0] == '0');
4847 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004848 *res++ = *pbuf++;
4849 *res++ = *pbuf++;
4850 }
4851 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004852 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004853 res += len;
4854 rescnt -= len;
4855 while (--width >= len) {
4856 --rescnt;
4857 *res++ = ' ';
4858 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004859 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004860 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004861 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004862 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004863 goto error;
4864 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004865 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004866 } /* '%' */
4867 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004868 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004869 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004870 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004871 goto error;
4872 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004873 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004874 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004875 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004876 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004877 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004878
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004879#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004880 unicode:
4881 if (args_owned) {
4882 Py_DECREF(args);
4883 args_owned = 0;
4884 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004885 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004886 if (PyTuple_Check(orig_args) && argidx > 0) {
4887 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004888 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004889 v = PyTuple_New(n);
4890 if (v == NULL)
4891 goto error;
4892 while (--n >= 0) {
4893 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4894 Py_INCREF(w);
4895 PyTuple_SET_ITEM(v, n, w);
4896 }
4897 args = v;
4898 } else {
4899 Py_INCREF(orig_args);
4900 args = orig_args;
4901 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004902 args_owned = 1;
4903 /* Take what we have of the result and let the Unicode formatting
4904 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004905 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004906 if (_PyString_Resize(&result, rescnt))
4907 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004908 fmtcnt = PyString_GET_SIZE(format) - \
4909 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004910 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4911 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004912 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004913 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004914 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004915 if (v == NULL)
4916 goto error;
4917 /* Paste what we have (result) to what the Unicode formatting
4918 function returned (v) and return the result (or error) */
4919 w = PyUnicode_Concat(result, v);
4920 Py_DECREF(result);
4921 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004922 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004923 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004924#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004925
Guido van Rossume5372401993-03-16 12:15:04 +00004926 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004927 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004928 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004929 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004930 }
Guido van Rossume5372401993-03-16 12:15:04 +00004931 return NULL;
4932}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004933
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934void
Fred Drakeba096332000-07-09 07:04:36 +00004935PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936{
4937 register PyStringObject *s = (PyStringObject *)(*p);
4938 PyObject *t;
4939 if (s == NULL || !PyString_Check(s))
4940 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004941 /* If it's a string subclass, we don't really know what putting
4942 it in the interned dict might do. */
4943 if (!PyString_CheckExact(s))
4944 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004946 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 if (interned == NULL) {
4948 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004949 if (interned == NULL) {
4950 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004951 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004953 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004954 t = PyDict_GetItem(interned, (PyObject *)s);
4955 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957 Py_DECREF(*p);
4958 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004959 return;
4960 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961
Armin Rigo79f7ad22004-08-07 19:27:39 +00004962 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004963 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004964 return;
4965 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004966 /* The two references in interned are not counted by refcnt.
4967 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004968 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004969 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004970}
4971
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972void
4973PyString_InternImmortal(PyObject **p)
4974{
4975 PyString_InternInPlace(p);
4976 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4977 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4978 Py_INCREF(*p);
4979 }
4980}
4981
Guido van Rossum2a61e741997-01-18 07:55:05 +00004982
4983PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004984PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004985{
4986 PyObject *s = PyString_FromString(cp);
4987 if (s == NULL)
4988 return NULL;
4989 PyString_InternInPlace(&s);
4990 return s;
4991}
4992
Guido van Rossum8cf04761997-08-02 02:57:45 +00004993void
Fred Drakeba096332000-07-09 07:04:36 +00004994PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004995{
4996 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004997 for (i = 0; i < UCHAR_MAX + 1; i++) {
4998 Py_XDECREF(characters[i]);
4999 characters[i] = NULL;
5000 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005001 Py_XDECREF(nullstring);
5002 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005003}
Barry Warsawa903ad982001-02-23 16:40:48 +00005004
Barry Warsawa903ad982001-02-23 16:40:48 +00005005void _Py_ReleaseInternedStrings(void)
5006{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005007 PyObject *keys;
5008 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005009 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005010 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005011
5012 if (interned == NULL || !PyDict_Check(interned))
5013 return;
5014 keys = PyDict_Keys(interned);
5015 if (keys == NULL || !PyList_Check(keys)) {
5016 PyErr_Clear();
5017 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005018 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019
5020 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5021 detector, interned strings are not forcibly deallocated; rather, we
5022 give them their stolen references back, and then clear and DECREF
5023 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005024
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005025 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005026 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5027 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005028 for (i = 0; i < n; i++) {
5029 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5030 switch (s->ob_sstate) {
5031 case SSTATE_NOT_INTERNED:
5032 /* XXX Shouldn't happen */
5033 break;
5034 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005035 Py_Refcnt(s) += 1;
5036 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005037 break;
5038 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005039 Py_Refcnt(s) += 2;
5040 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005041 break;
5042 default:
5043 Py_FatalError("Inconsistent interned string state.");
5044 }
5045 s->ob_sstate = SSTATE_NOT_INTERNED;
5046 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005047 fprintf(stderr, "total size of all interned strings: "
5048 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5049 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005050 Py_DECREF(keys);
5051 PyDict_Clear(interned);
5052 Py_DECREF(interned);
5053 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005054}