blob: 4dd64f8429aec3fc95ab618b16a30d9abbbe1e6c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000720 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000753 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000808 Py_ssize_t size = Py_Size(op);
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000829 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
830 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Martin v. Löwis68192102007-07-21 06:55:02 +0000834 for (i = 0; i < Py_Size(op); i++) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000857 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000875 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000876 memchr(op->ob_sval, '\'', Py_Size(op)) &&
877 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000882 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000931 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
Martin v. Löwis68192102007-07-21 06:55:02 +0000938 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Andrew Dalke598710c2006-05-25 18:18:39 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000951 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000952 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000953 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 return NULL;
955 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000958 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000959 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000960 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000961 Py_INCREF(bb);
962 return bb;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 Py_INCREF(a);
965 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000967 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000968 if (size < 0) {
969 PyErr_SetString(PyExc_OverflowError,
970 "strings are too large to concat");
971 return NULL;
972 }
973
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000974 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000975 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000976 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000978 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000979 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000980 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000981 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
982 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985#undef b
986}
987
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000989string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991 register Py_ssize_t i;
992 register Py_ssize_t j;
993 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000995 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 if (n < 0)
997 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000998 /* watch out for overflows: the size can overflow int,
999 * and the # of bytes needed can overflow size_t
1000 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001001 size = Py_Size(a) * n;
1002 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001003 PyErr_SetString(PyExc_OverflowError,
1004 "repeated string is too long");
1005 return NULL;
1006 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001007 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 Py_INCREF(a);
1009 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 }
Tim Peterse7c05322004-06-27 17:24:49 +00001011 nbytes = (size_t)size;
1012 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001013 PyErr_SetString(PyExc_OverflowError,
1014 "repeated string is too long");
1015 return NULL;
1016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001017 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001018 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001019 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001021 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001022 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001023 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001024 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001025 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001026 memset(op->ob_sval, a->ob_sval[0] , n);
1027 return (PyObject *) op;
1028 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001029 i = 0;
1030 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001031 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1032 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001033 }
1034 while (i < size) {
1035 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001036 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i += j;
1038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040}
1041
1042/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001045string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001046 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001047 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048{
1049 if (i < 0)
1050 i = 0;
1051 if (j < 0)
1052 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001053 if (j > Py_Size(a))
1054 j = Py_Size(a);
1055 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001056 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001057 Py_INCREF(a);
1058 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 }
1060 if (j < i)
1061 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001062 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Guido van Rossum9284a572000-03-07 15:53:43 +00001065static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001066string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001070 if (PyUnicode_Check(sub_obj))
1071 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001074 PyErr_Format(PyExc_TypeError,
1075 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001076 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001077 return -1;
1078 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001079 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001080
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001082}
1083
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001085string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001087 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001088 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001089 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 return NULL;
1092 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001093 pchar = a->ob_sval[i];
1094 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001095 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001096 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001097 else {
1098#ifdef COUNT_ALLOCS
1099 one_strings++;
1100#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001101 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001102 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001103 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001104}
1105
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106static PyObject*
1107string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001108{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001109 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001110 Py_ssize_t len_a, len_b;
1111 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001112 PyObject *result;
1113
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001114 /* Make sure both arguments are strings. */
1115 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 result = Py_NotImplemented;
1117 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001118 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 if (a == b) {
1120 switch (op) {
1121 case Py_EQ:case Py_LE:case Py_GE:
1122 result = Py_True;
1123 goto out;
1124 case Py_NE:case Py_LT:case Py_GT:
1125 result = Py_False;
1126 goto out;
1127 }
1128 }
1129 if (op == Py_EQ) {
1130 /* Supporting Py_NE here as well does not save
1131 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001132 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001134 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001135 result = Py_True;
1136 } else {
1137 result = Py_False;
1138 }
1139 goto out;
1140 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001141 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001142 min_len = (len_a < len_b) ? len_a : len_b;
1143 if (min_len > 0) {
1144 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1145 if (c==0)
1146 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001147 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001148 c = 0;
1149 if (c == 0)
1150 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1151 switch (op) {
1152 case Py_LT: c = c < 0; break;
1153 case Py_LE: c = c <= 0; break;
1154 case Py_EQ: assert(0); break; /* unreachable */
1155 case Py_NE: c = c != 0; break;
1156 case Py_GT: c = c > 0; break;
1157 case Py_GE: c = c >= 0; break;
1158 default:
1159 result = Py_NotImplemented;
1160 goto out;
1161 }
1162 result = c ? Py_True : Py_False;
1163 out:
1164 Py_INCREF(result);
1165 return result;
1166}
1167
1168int
1169_PyString_Eq(PyObject *o1, PyObject *o2)
1170{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001171 PyStringObject *a = (PyStringObject*) o1;
1172 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001173 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001174 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001175 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001176}
1177
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178static long
Fred Drakeba096332000-07-09 07:04:36 +00001179string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 register unsigned char *p;
1183 register long x;
1184
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 if (a->ob_shash != -1)
1186 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001187 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001188 p = (unsigned char *) a->ob_sval;
1189 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001191 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001192 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 if (x == -1)
1194 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001196 return x;
1197}
1198
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199static PyObject*
1200string_subscript(PyStringObject* self, PyObject* item)
1201{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001202 if (PyIndex_Check(item)) {
1203 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 if (i == -1 && PyErr_Occurred())
1205 return NULL;
1206 if (i < 0)
1207 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001208 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 }
1210 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001211 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 char* source_buf;
1213 char* result_buf;
1214 PyObject* result;
1215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 PyString_GET_SIZE(self),
1218 &start, &stop, &step, &slicelength) < 0) {
1219 return NULL;
1220 }
1221
1222 if (slicelength <= 0) {
1223 return PyString_FromStringAndSize("", 0);
1224 }
1225 else {
1226 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001227 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001228 if (result_buf == NULL)
1229 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 cur += step, i++) {
1233 result_buf[i] = source_buf[cur];
1234 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001235
1236 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001237 slicelength);
1238 PyMem_Free(result_buf);
1239 return result;
1240 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001241 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001242 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001243 PyErr_Format(PyExc_TypeError,
1244 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001245 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 return NULL;
1247 }
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
1251string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252{
1253 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001254 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001255 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256 return -1;
1257 }
1258 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001259 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001260}
1261
Martin v. Löwis18e16552006-02-15 17:27:45 +00001262static Py_ssize_t
1263string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001264{
Guido van Rossum045e6881997-09-08 18:30:11 +00001265 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001266 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267 return -1;
1268}
1269
Martin v. Löwis18e16552006-02-15 17:27:45 +00001270static Py_ssize_t
1271string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272{
1273 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001274 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001275 return 1;
1276}
1277
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278static Py_ssize_t
1279string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001280{
1281 if ( index != 0 ) {
1282 PyErr_SetString(PyExc_SystemError,
1283 "accessing non-existent string segment");
1284 return -1;
1285 }
1286 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001287 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001288}
1289
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001290static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001292 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (ssizeargfunc)string_repeat, /*sq_repeat*/
1294 (ssizeargfunc)string_item, /*sq_item*/
1295 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001296 0, /*sq_ass_item*/
1297 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001298 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001299};
1300
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001301static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001302 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303 (binaryfunc)string_subscript,
1304 0,
1305};
1306
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001307static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001308 (readbufferproc)string_buffer_getreadbuf,
1309 (writebufferproc)string_buffer_getwritebuf,
1310 (segcountproc)string_buffer_getsegcount,
1311 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001312};
1313
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314
1315
1316#define LEFTSTRIP 0
1317#define RIGHTSTRIP 1
1318#define BOTHSTRIP 2
1319
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001320/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001321static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1322
1323#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001324
Andrew Dalke525eab32006-05-26 14:00:45 +00001325
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001326/* Don't call if length < 2 */
1327#define Py_STRING_MATCH(target, offset, pattern, length) \
1328 (target[offset] == pattern[0] && \
1329 target[offset+length-1] == pattern[length-1] && \
1330 !memcmp(target+offset+1, pattern+1, length-2) )
1331
1332
Andrew Dalke525eab32006-05-26 14:00:45 +00001333/* Overallocate the initial list to reduce the number of reallocs for small
1334 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1335 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1336 text (roughly 11 words per line) and field delimited data (usually 1-10
1337 fields). For large strings the split algorithms are bandwidth limited
1338 so increasing the preallocation likely will not improve things.*/
1339
1340#define MAX_PREALLOC 12
1341
1342/* 5 splits gives 6 elements */
1343#define PREALLOC_SIZE(maxsplit) \
1344 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1345
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001346#define SPLIT_APPEND(data, left, right) \
1347 str = PyString_FromStringAndSize((data) + (left), \
1348 (right) - (left)); \
1349 if (str == NULL) \
1350 goto onError; \
1351 if (PyList_Append(list, str)) { \
1352 Py_DECREF(str); \
1353 goto onError; \
1354 } \
1355 else \
1356 Py_DECREF(str);
1357
Andrew Dalke02758d62006-05-26 15:21:01 +00001358#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001359 str = PyString_FromStringAndSize((data) + (left), \
1360 (right) - (left)); \
1361 if (str == NULL) \
1362 goto onError; \
1363 if (count < MAX_PREALLOC) { \
1364 PyList_SET_ITEM(list, count, str); \
1365 } else { \
1366 if (PyList_Append(list, str)) { \
1367 Py_DECREF(str); \
1368 goto onError; \
1369 } \
1370 else \
1371 Py_DECREF(str); \
1372 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001373 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001374
1375/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001376#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001377
Andrew Dalke02758d62006-05-26 15:21:01 +00001378#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1379#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1380#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1381#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1382
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001383Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001384split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385{
Andrew Dalke525eab32006-05-26 14:00:45 +00001386 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001387 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001388 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389
1390 if (list == NULL)
1391 return NULL;
1392
Andrew Dalke02758d62006-05-26 15:21:01 +00001393 i = j = 0;
1394
1395 while (maxsplit-- > 0) {
1396 SKIP_SPACE(s, i, len);
1397 if (i==len) break;
1398 j = i; i++;
1399 SKIP_NONSPACE(s, i, len);
1400 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001402
1403 if (i < len) {
1404 /* Only occurs when maxsplit was reached */
1405 /* Skip any remaining whitespace and copy to end of string */
1406 SKIP_SPACE(s, i, len);
1407 if (i != len)
1408 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 Py_DECREF(list);
1414 return NULL;
1415}
1416
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001417Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001418split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419{
Andrew Dalke525eab32006-05-26 14:00:45 +00001420 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001422 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423
1424 if (list == NULL)
1425 return NULL;
1426
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001427 i = j = 0;
1428 while ((j < len) && (maxcount-- > 0)) {
1429 for(; j<len; j++) {
1430 /* I found that using memchr makes no difference */
1431 if (s[j] == ch) {
1432 SPLIT_ADD(s, i, j);
1433 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001435 }
1436 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001437 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001438 if (i <= len) {
1439 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 return list;
1443
1444 onError:
1445 Py_DECREF(list);
1446 return NULL;
1447}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001449PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450"S.split([sep [,maxsplit]]) -> list of strings\n\
1451\n\
1452Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001453delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001454splits are done. If sep is not specified or is None, any\n\
1455whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456
1457static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001458string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001460 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001461 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001464#ifdef USE_FAST
1465 Py_ssize_t pos;
1466#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467
Martin v. Löwis9c830762006-04-13 08:37:17 +00001468 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001470 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001471 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (PyString_Check(subobj)) {
1475 sub = PyString_AS_STRING(subobj);
1476 n = PyString_GET_SIZE(subobj);
1477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001478#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 else if (PyUnicode_Check(subobj))
1480 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001481#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001482 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1483 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 if (n == 0) {
1486 PyErr_SetString(PyExc_ValueError, "empty separator");
1487 return NULL;
1488 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 else if (n == 1)
1490 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491
Andrew Dalke525eab32006-05-26 14:00:45 +00001492 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 if (list == NULL)
1494 return NULL;
1495
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001496#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001498 while (maxsplit-- > 0) {
1499 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1500 if (pos < 0)
1501 break;
1502 j = i+pos;
1503 SPLIT_ADD(s, i, j);
1504 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001506#else
1507 i = j = 0;
1508 while ((j+n <= len) && (maxsplit-- > 0)) {
1509 for (; j+n <= len; j++) {
1510 if (Py_STRING_MATCH(s, j, sub, n)) {
1511 SPLIT_ADD(s, i, j);
1512 i = j = j + n;
1513 break;
1514 }
1515 }
1516 }
1517#endif
1518 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001519 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 return list;
1521
Andrew Dalke525eab32006-05-26 14:00:45 +00001522 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523 Py_DECREF(list);
1524 return NULL;
1525}
1526
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001527PyDoc_STRVAR(partition__doc__,
1528"S.partition(sep) -> (head, sep, tail)\n\
1529\n\
1530Searches for the separator sep in S, and returns the part before it,\n\
1531the separator itself, and the part after it. If the separator is not\n\
1532found, returns S and two empty strings.");
1533
1534static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001535string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001536{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001537 const char *sep;
1538 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001539
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001540 if (PyString_Check(sep_obj)) {
1541 sep = PyString_AS_STRING(sep_obj);
1542 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001544#ifdef Py_USING_UNICODE
1545 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001546 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001547#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001548 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549 return NULL;
1550
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001551 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001552 (PyObject*) self,
1553 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1554 sep_obj, sep, sep_len
1555 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001556}
1557
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001558PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001559"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001560\n\
1561Searches for the separator sep in S, starting at the end of S, and returns\n\
1562the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001563separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001564
1565static PyObject *
1566string_rpartition(PyStringObject *self, PyObject *sep_obj)
1567{
1568 const char *sep;
1569 Py_ssize_t sep_len;
1570
1571 if (PyString_Check(sep_obj)) {
1572 sep = PyString_AS_STRING(sep_obj);
1573 sep_len = PyString_GET_SIZE(sep_obj);
1574 }
1575#ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(sep_obj))
1577 return PyUnicode_Partition((PyObject *) self, sep_obj);
1578#endif
1579 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1580 return NULL;
1581
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001582 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001583 (PyObject*) self,
1584 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1585 sep_obj, sep, sep_len
1586 );
1587}
1588
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001589Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001590rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001591{
Andrew Dalke525eab32006-05-26 14:00:45 +00001592 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001593 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595
1596 if (list == NULL)
1597 return NULL;
1598
Andrew Dalke02758d62006-05-26 15:21:01 +00001599 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001600
Andrew Dalke02758d62006-05-26 15:21:01 +00001601 while (maxsplit-- > 0) {
1602 RSKIP_SPACE(s, i);
1603 if (i<0) break;
1604 j = i; i--;
1605 RSKIP_NONSPACE(s, i);
1606 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001608 if (i >= 0) {
1609 /* Only occurs when maxsplit was reached */
1610 /* Skip any remaining whitespace and copy to beginning of string */
1611 RSKIP_SPACE(s, i);
1612 if (i >= 0)
1613 SPLIT_ADD(s, 0, i + 1);
1614
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001615 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001617 if (PyList_Reverse(list) < 0)
1618 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001619 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001620 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 Py_DECREF(list);
1622 return NULL;
1623}
1624
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001625Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001626rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627{
Andrew Dalke525eab32006-05-26 14:00:45 +00001628 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631
1632 if (list == NULL)
1633 return NULL;
1634
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001635 i = j = len - 1;
1636 while ((i >= 0) && (maxcount-- > 0)) {
1637 for (; i >= 0; i--) {
1638 if (s[i] == ch) {
1639 SPLIT_ADD(s, i + 1, j + 1);
1640 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001642 }
1643 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001644 }
1645 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001646 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001647 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001649 if (PyList_Reverse(list) < 0)
1650 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 return list;
1652
1653 onError:
1654 Py_DECREF(list);
1655 return NULL;
1656}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657
1658PyDoc_STRVAR(rsplit__doc__,
1659"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1660\n\
1661Return a list of the words in the string S, using sep as the\n\
1662delimiter string, starting at the end of the string and working\n\
1663to the front. If maxsplit is given, at most maxsplit splits are\n\
1664done. If sep is not specified or is None, any whitespace string\n\
1665is a separator.");
1666
1667static PyObject *
1668string_rsplit(PyStringObject *self, PyObject *args)
1669{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001670 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001671 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001672 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674
Martin v. Löwis9c830762006-04-13 08:37:17 +00001675 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676 return NULL;
1677 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001678 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 if (subobj == Py_None)
1680 return rsplit_whitespace(s, len, maxsplit);
1681 if (PyString_Check(subobj)) {
1682 sub = PyString_AS_STRING(subobj);
1683 n = PyString_GET_SIZE(subobj);
1684 }
1685#ifdef Py_USING_UNICODE
1686 else if (PyUnicode_Check(subobj))
1687 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1688#endif
1689 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1690 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001691
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 if (n == 0) {
1693 PyErr_SetString(PyExc_ValueError, "empty separator");
1694 return NULL;
1695 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001696 else if (n == 1)
1697 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698
Andrew Dalke525eab32006-05-26 14:00:45 +00001699 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 if (list == NULL)
1701 return NULL;
1702
1703 j = len;
1704 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001705
1706 while ( (i >= 0) && (maxsplit-- > 0) ) {
1707 for (; i>=0; i--) {
1708 if (Py_STRING_MATCH(s, i, sub, n)) {
1709 SPLIT_ADD(s, i + n, j);
1710 j = i;
1711 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001713 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001716 SPLIT_ADD(s, 0, j);
1717 FIX_PREALLOC_SIZE(list);
1718 if (PyList_Reverse(list) < 0)
1719 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720 return list;
1721
Andrew Dalke525eab32006-05-26 14:00:45 +00001722onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723 Py_DECREF(list);
1724 return NULL;
1725}
1726
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001728PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729"S.join(sequence) -> string\n\
1730\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
1734static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001735string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736{
1737 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001738 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001744 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745
Tim Peters19fe14e2001-01-19 03:03:47 +00001746 seq = PySequence_Fast(orig, "");
1747 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001748 return NULL;
1749 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001750
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001751 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 if (seqlen == 0) {
1753 Py_DECREF(seq);
1754 return PyString_FromString("");
1755 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001757 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001758 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1759 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001761 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764
Raymond Hettinger674f2412004-08-23 23:23:54 +00001765 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001766 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 * Do a pre-pass to figure out the total amount of space we'll
1768 * need (sz), see whether any argument is absurd, and defer to
1769 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001770 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001771 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 item = PySequence_Fast_GET_ITEM(seq, i);
1774 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001775#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001777 /* Defer to Unicode join.
1778 * CAUTION: There's no gurantee that the
1779 * original sequence can be iterated over
1780 * again, so we must pass seq here.
1781 */
1782 PyObject *result;
1783 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001784 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001785 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001787#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001789 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001790 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001791 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 Py_DECREF(seq);
1793 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 sz += PyString_GET_SIZE(item);
1796 if (i != 0)
1797 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001798 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001800 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 Py_DECREF(seq);
1802 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 }
1805
1806 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001807 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 if (res == NULL) {
1809 Py_DECREF(seq);
1810 return NULL;
1811 }
1812
1813 /* Catenate everything. */
1814 p = PyString_AS_STRING(res);
1815 for (i = 0; i < seqlen; ++i) {
1816 size_t n;
1817 item = PySequence_Fast_GET_ITEM(seq, i);
1818 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001819 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 p += n;
1821 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001822 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001823 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001826
Jeremy Hylton49048292000-07-11 03:28:17 +00001827 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829}
1830
Tim Peters52e155e2001-06-16 05:42:57 +00001831PyObject *
1832_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001833{
Tim Petersa7259592001-06-16 05:11:17 +00001834 assert(sep != NULL && PyString_Check(sep));
1835 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001836 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001837}
1838
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001839Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001841{
1842 if (*end > len)
1843 *end = len;
1844 else if (*end < 0)
1845 *end += len;
1846 if (*end < 0)
1847 *end = 0;
1848 if (*start < 0)
1849 *start += len;
1850 if (*start < 0)
1851 *start = 0;
1852}
1853
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001854Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001855string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001858 const char *sub;
1859 Py_ssize_t sub_len;
1860 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001862 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1863 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 return -2;
1865 if (PyString_Check(subobj)) {
1866 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001867 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001869#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001871 return PyUnicode_Find(
1872 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001873#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001874 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001875 /* XXX - the "expected a character buffer object" is pretty
1876 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877 return -2;
1878
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001879 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001880 return stringlib_find_slice(
1881 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1882 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001883 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001884 return stringlib_rfind_slice(
1885 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1886 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887}
1888
1889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001890PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891"S.find(sub [,start [,end]]) -> int\n\
1892\n\
1893Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001894such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895arguments start and end are interpreted as in slice notation.\n\
1896\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898
1899static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001900string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 if (result == -2)
1904 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001909PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910"S.index(sub [,start [,end]]) -> int\n\
1911\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001912Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
1914static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001915string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 if (result == -2)
1919 return NULL;
1920 if (result == -1) {
1921 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001922 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 return NULL;
1924 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001925 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926}
1927
1928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001929PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930"S.rfind(sub [,start [,end]]) -> int\n\
1931\n\
1932Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001933such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934arguments start and end are interpreted as in slice notation.\n\
1935\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937
1938static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001939string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 if (result == -2)
1943 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.rindex(sub [,start [,end]]) -> int\n\
1950\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
1953static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001954string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001956 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 if (result == -2)
1958 return NULL;
1959 if (result == -1) {
1960 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001961 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 return NULL;
1963 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001964 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965}
1966
1967
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001968Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1970{
1971 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001972 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1975 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976
1977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
1979 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1980 i++;
1981 }
1982 }
1983
1984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
1988 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1989 j++;
1990 }
1991
1992 if (i == 0 && j == len && PyString_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyString_FromStringAndSize(s+i, j-i);
1998}
1999
2000
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002001Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002002do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
2004 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 i = 0;
2008 if (striptype != RIGHTSTRIP) {
2009 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2010 i++;
2011 }
2012 }
2013
2014 j = len;
2015 if (striptype != LEFTSTRIP) {
2016 do {
2017 j--;
2018 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2019 j++;
2020 }
2021
Tim Peters8fa5dd02001-09-12 02:18:30 +00002022 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 Py_INCREF(self);
2024 return (PyObject*)self;
2025 }
2026 else
2027 return PyString_FromStringAndSize(s+i, j-i);
2028}
2029
2030
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002031Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002032do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2033{
2034 PyObject *sep = NULL;
2035
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002036 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002037 return NULL;
2038
2039 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002040 if (PyString_Check(sep))
2041 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002042#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002043 else if (PyUnicode_Check(sep)) {
2044 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2045 PyObject *res;
2046 if (uniself==NULL)
2047 return NULL;
2048 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2049 striptype, sep);
2050 Py_DECREF(uniself);
2051 return res;
2052 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002054 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 STRIPNAME(striptype));
2061 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 }
2063
2064 return do_strip(self, striptype);
2065}
2066
2067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002069"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070\n\
2071Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002072whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002073If chars is given and not None, remove characters in chars instead.\n\
2074If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
2076static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079 if (PyTuple_GET_SIZE(args) == 0)
2080 return do_strip(self, BOTHSTRIP); /* Common case */
2081 else
2082 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083}
2084
2085
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002086PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002087"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002089Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002090If chars is given and not None, remove characters in chars instead.\n\
2091If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
2093static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096 if (PyTuple_GET_SIZE(args) == 0)
2097 return do_strip(self, LEFTSTRIP); /* Common case */
2098 else
2099 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100}
2101
2102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002104"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002106Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002107If chars is given and not None, remove characters in chars instead.\n\
2108If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109
2110static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113 if (PyTuple_GET_SIZE(args) == 0)
2114 return do_strip(self, RIGHTSTRIP); /* Common case */
2115 else
2116 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117}
2118
2119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002120PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121"S.lower() -> string\n\
2122\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002123Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002125/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2126#ifndef _tolower
2127#define _tolower tolower
2128#endif
2129
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002131string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002133 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002134 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002135 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002137 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140
2141 s = PyString_AS_STRING(newobj);
2142
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002143 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002144
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002146 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002147 if (isupper(c))
2148 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002150
Anthony Baxtera6286212006-04-11 07:42:36 +00002151 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152}
2153
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002154PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155"S.upper() -> string\n\
2156\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002159#ifndef _toupper
2160#define _toupper toupper
2161#endif
2162
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002166 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002168 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002170 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173
2174 s = PyString_AS_STRING(newobj);
2175
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002176 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180 if (islower(c))
2181 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002183
Anthony Baxtera6286212006-04-11 07:42:36 +00002184 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185}
2186
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188"S.title() -> string\n\
2189\n\
2190Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002191characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192
2193static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002194string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195{
2196 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002197 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002199 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 newobj = PyString_FromStringAndSize(NULL, n);
2202 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002204 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 for (i = 0; i < n; i++) {
2206 int c = Py_CHARMASK(*s++);
2207 if (islower(c)) {
2208 if (!previous_is_cased)
2209 c = toupper(c);
2210 previous_is_cased = 1;
2211 } else if (isupper(c)) {
2212 if (previous_is_cased)
2213 c = tolower(c);
2214 previous_is_cased = 1;
2215 } else
2216 previous_is_cased = 0;
2217 *s_new++ = c;
2218 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002219 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220}
2221
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223"S.capitalize() -> string\n\
2224\n\
2225Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002226capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
2228static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002229string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230{
2231 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002232 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002233 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Anthony Baxtera6286212006-04-11 07:42:36 +00002235 newobj = PyString_FromStringAndSize(NULL, n);
2236 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002238 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 if (0 < n) {
2240 int c = Py_CHARMASK(*s++);
2241 if (islower(c))
2242 *s_new = toupper(c);
2243 else
2244 *s_new = c;
2245 s_new++;
2246 }
2247 for (i = 1; i < n; i++) {
2248 int c = Py_CHARMASK(*s++);
2249 if (isupper(c))
2250 *s_new = tolower(c);
2251 else
2252 *s_new = c;
2253 s_new++;
2254 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002255 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256}
2257
2258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260"S.count(sub[, start[, end]]) -> int\n\
2261\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002262Return the number of non-overlapping occurrences of substring sub in\n\
2263string S[start:end]. Optional arguments start and end are interpreted\n\
2264as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
2266static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002267string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002269 PyObject *sub_obj;
2270 const char *str = PyString_AS_STRING(self), *sub;
2271 Py_ssize_t sub_len;
2272 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002274 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2275 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002277
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002278 if (PyString_Check(sub_obj)) {
2279 sub = PyString_AS_STRING(sub_obj);
2280 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002282#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002283 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002284 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002286 if (count == -1)
2287 return NULL;
2288 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002289 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002290 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002291#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002292 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 return NULL;
2294
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002295 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002296
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002297 return PyInt_FromSsize_t(
2298 stringlib_count(str + start, end - start, sub, sub_len)
2299 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300}
2301
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002302PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303"S.swapcase() -> string\n\
2304\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002306converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307
2308static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002309string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310{
2311 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002312 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002313 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314
Anthony Baxtera6286212006-04-11 07:42:36 +00002315 newobj = PyString_FromStringAndSize(NULL, n);
2316 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002318 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 for (i = 0; i < n; i++) {
2320 int c = Py_CHARMASK(*s++);
2321 if (islower(c)) {
2322 *s_new = toupper(c);
2323 }
2324 else if (isupper(c)) {
2325 *s_new = tolower(c);
2326 }
2327 else
2328 *s_new = c;
2329 s_new++;
2330 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002331 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332}
2333
2334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002335PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336"S.translate(table [,deletechars]) -> string\n\
2337\n\
2338Return a copy of the string S, where all characters occurring\n\
2339in the optional argument deletechars are removed, and the\n\
2340remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002341translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
2343static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002344string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002347 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002348 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002350 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002351 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 PyObject *result;
2353 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002356 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359
2360 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002361 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002362 tablen = PyString_GET_SIZE(tableobj);
2363 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002364 else if (tableobj == Py_None) {
2365 table = NULL;
2366 tablen = 256;
2367 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002368#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002370 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 parameter; instead a mapping to None will cause characters
2372 to be deleted. */
2373 if (delobj != NULL) {
2374 PyErr_SetString(PyExc_TypeError,
2375 "deletions are implemented differently for unicode");
2376 return NULL;
2377 }
2378 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2379 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002380#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002381 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383
Martin v. Löwis00b61272002-12-12 20:03:19 +00002384 if (tablen != 256) {
2385 PyErr_SetString(PyExc_ValueError,
2386 "translation table must be 256 characters long");
2387 return NULL;
2388 }
2389
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 if (delobj != NULL) {
2391 if (PyString_Check(delobj)) {
2392 del_table = PyString_AS_STRING(delobj);
2393 dellen = PyString_GET_SIZE(delobj);
2394 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002395#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396 else if (PyUnicode_Check(delobj)) {
2397 PyErr_SetString(PyExc_TypeError,
2398 "deletions are implemented differently for unicode");
2399 return NULL;
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2403 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 }
2405 else {
2406 del_table = NULL;
2407 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 }
2409
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002410 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 result = PyString_FromStringAndSize((char *)NULL, inlen);
2412 if (result == NULL)
2413 return NULL;
2414 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002415 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002417 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 /* If no deletions are required, use faster code */
2419 for (i = inlen; --i >= 0; ) {
2420 c = Py_CHARMASK(*input++);
2421 if (Py_CHARMASK((*output++ = table[c])) != c)
2422 changed = 1;
2423 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002424 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 return result;
2426 Py_DECREF(result);
2427 Py_INCREF(input_obj);
2428 return input_obj;
2429 }
2430
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002431 if (table == NULL) {
2432 for (i = 0; i < 256; i++)
2433 trans_table[i] = Py_CHARMASK(i);
2434 } else {
2435 for (i = 0; i < 256; i++)
2436 trans_table[i] = Py_CHARMASK(table[i]);
2437 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439 for (i = 0; i < dellen; i++)
2440 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2441
2442 for (i = inlen; --i >= 0; ) {
2443 c = Py_CHARMASK(*input++);
2444 if (trans_table[c] != -1)
2445 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2446 continue;
2447 changed = 1;
2448 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002449 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450 Py_DECREF(result);
2451 Py_INCREF(input_obj);
2452 return input_obj;
2453 }
2454 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002455 if (inlen > 0)
2456 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457 return result;
2458}
2459
2460
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002461#define FORWARD 1
2462#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002464/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002466#define findchar(target, target_len, c) \
2467 ((char *)memchr((const void *)(target), c, target_len))
2468
2469/* String ops must return a string. */
2470/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002471Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002472return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002473{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002474 if (PyString_CheckExact(self)) {
2475 Py_INCREF(self);
2476 return self;
2477 }
2478 return (PyStringObject *)PyString_FromStringAndSize(
2479 PyString_AS_STRING(self),
2480 PyString_GET_SIZE(self));
2481}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002483Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002484countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485{
2486 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002487 const char *start=target;
2488 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002490 while ( (start=findchar(start, end-start, c)) != NULL ) {
2491 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002492 if (count >= maxcount)
2493 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002494 start += 1;
2495 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496 return count;
2497}
2498
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002499Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002500findstring(const char *target, Py_ssize_t target_len,
2501 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002502 Py_ssize_t start,
2503 Py_ssize_t end,
2504 int direction)
2505{
2506 if (start < 0) {
2507 start += target_len;
2508 if (start < 0)
2509 start = 0;
2510 }
2511 if (end > target_len) {
2512 end = target_len;
2513 } else if (end < 0) {
2514 end += target_len;
2515 if (end < 0)
2516 end = 0;
2517 }
2518
2519 /* zero-length substrings always match at the first attempt */
2520 if (pattern_len == 0)
2521 return (direction > 0) ? start : end;
2522
2523 end -= pattern_len;
2524
2525 if (direction < 0) {
2526 for (; end >= start; end--)
2527 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2528 return end;
2529 } else {
2530 for (; start <= end; start++)
2531 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2532 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533 }
2534 return -1;
2535}
2536
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002537Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002538countstring(const char *target, Py_ssize_t target_len,
2539 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002540 Py_ssize_t start,
2541 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002542 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002544 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002545
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002546 if (start < 0) {
2547 start += target_len;
2548 if (start < 0)
2549 start = 0;
2550 }
2551 if (end > target_len) {
2552 end = target_len;
2553 } else if (end < 0) {
2554 end += target_len;
2555 if (end < 0)
2556 end = 0;
2557 }
2558
2559 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002560 if (pattern_len == 0 || maxcount == 0) {
2561 if (target_len+1 < maxcount)
2562 return target_len+1;
2563 return maxcount;
2564 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565
2566 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002568 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002569 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2570 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002571 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002572 end -= pattern_len-1;
2573 }
2574 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002575 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002576 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2577 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002578 if (--maxcount <= 0)
2579 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580 start += pattern_len-1;
2581 }
2582 }
2583 return count;
2584}
2585
2586
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002587/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002588
2589/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002590Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002592 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 Py_ssize_t maxcount)
2594{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002595 char *self_s, *result_s;
2596 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002597 Py_ssize_t count, i, product;
2598 PyStringObject *result;
2599
2600 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002601
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602 /* 1 at the end plus 1 after every character */
2603 count = self_len+1;
2604 if (maxcount < count)
2605 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002606
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002607 /* Check for overflow */
2608 /* result_len = count * to_len + self_len; */
2609 product = count * to_len;
2610 if (product / to_len != count) {
2611 PyErr_SetString(PyExc_OverflowError,
2612 "replace string is too long");
2613 return NULL;
2614 }
2615 result_len = product + self_len;
2616 if (result_len < 0) {
2617 PyErr_SetString(PyExc_OverflowError,
2618 "replace string is too long");
2619 return NULL;
2620 }
2621
2622 if (! (result = (PyStringObject *)
2623 PyString_FromStringAndSize(NULL, result_len)) )
2624 return NULL;
2625
2626 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002627 result_s = PyString_AS_STRING(result);
2628
2629 /* TODO: special case single character, which doesn't need memcpy */
2630
2631 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002632 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633 result_s += to_len;
2634 count -= 1;
2635
2636 for (i=0; i<count; i++) {
2637 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002638 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002639 result_s += to_len;
2640 }
2641
2642 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002643 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002644
2645 return result;
2646}
2647
2648/* Special case for deleting a single character */
2649/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002650Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002651replace_delete_single_character(PyStringObject *self,
2652 char from_c, Py_ssize_t maxcount)
2653{
2654 char *self_s, *result_s;
2655 char *start, *next, *end;
2656 Py_ssize_t self_len, result_len;
2657 Py_ssize_t count;
2658 PyStringObject *result;
2659
2660 self_len = PyString_GET_SIZE(self);
2661 self_s = PyString_AS_STRING(self);
2662
Andrew Dalke51324072006-05-26 20:25:22 +00002663 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002664 if (count == 0) {
2665 return return_self(self);
2666 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002667
2668 result_len = self_len - count; /* from_len == 1 */
2669 assert(result_len>=0);
2670
2671 if ( (result = (PyStringObject *)
2672 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2673 return NULL;
2674 result_s = PyString_AS_STRING(result);
2675
2676 start = self_s;
2677 end = self_s + self_len;
2678 while (count-- > 0) {
2679 next = findchar(start, end-start, from_c);
2680 if (next == NULL)
2681 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002682 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002683 result_s += (next-start);
2684 start = next+1;
2685 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002686 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002687
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002688 return result;
2689}
2690
2691/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2692
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002693Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002694replace_delete_substring(PyStringObject *self,
2695 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002697 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002698 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002699 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002700 Py_ssize_t count, offset;
2701 PyStringObject *result;
2702
2703 self_len = PyString_GET_SIZE(self);
2704 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002705
2706 count = countstring(self_s, self_len,
2707 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002708 0, self_len, 1,
2709 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002710
2711 if (count == 0) {
2712 /* no matches */
2713 return return_self(self);
2714 }
2715
2716 result_len = self_len - (count * from_len);
2717 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002718
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 if ( (result = (PyStringObject *)
2720 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2721 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002722
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002724
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002725 start = self_s;
2726 end = self_s + self_len;
2727 while (count-- > 0) {
2728 offset = findstring(start, end-start,
2729 from_s, from_len,
2730 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002731 if (offset == -1)
2732 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002734
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002735 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002736
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737 result_s += (next-start);
2738 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002739 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002740 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742}
2743
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002745Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746replace_single_character_in_place(PyStringObject *self,
2747 char from_c, char to_c,
2748 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002749{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002750 char *self_s, *result_s, *start, *end, *next;
2751 Py_ssize_t self_len;
2752 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002753
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 /* The result string will be the same size */
2755 self_s = PyString_AS_STRING(self);
2756 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002757
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002759
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002760 if (next == NULL) {
2761 /* No matches; return the original string */
2762 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002763 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002766 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767 if (result == NULL)
2768 return NULL;
2769 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002770 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002771
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772 /* change everything in-place, starting with this one */
2773 start = result_s + (next-self_s);
2774 *start = to_c;
2775 start++;
2776 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002777
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 while (--maxcount > 0) {
2779 next = findchar(start, end-start, from_c);
2780 if (next == NULL)
2781 break;
2782 *next = to_c;
2783 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002784 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002785
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002786 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002787}
2788
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002790Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002792 const char *from_s, Py_ssize_t from_len,
2793 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794 Py_ssize_t maxcount)
2795{
2796 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002797 char *self_s;
2798 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002800
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002801 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002802
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 self_s = PyString_AS_STRING(self);
2804 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002805
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 offset = findstring(self_s, self_len,
2807 from_s, from_len,
2808 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002809 if (offset == -1) {
2810 /* No matches; return the original string */
2811 return return_self(self);
2812 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002813
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002815 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 if (result == NULL)
2817 return NULL;
2818 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002819 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002820
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821 /* change everything in-place, starting with this one */
2822 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002823 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824 start += from_len;
2825 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002826
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 while ( --maxcount > 0) {
2828 offset = findstring(start, end-start,
2829 from_s, from_len,
2830 0, end-start, FORWARD);
2831 if (offset==-1)
2832 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002833 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834 start += offset+from_len;
2835 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002836
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002837 return result;
2838}
2839
2840/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002841Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842replace_single_character(PyStringObject *self,
2843 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002844 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 Py_ssize_t maxcount)
2846{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002847 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002849 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850 Py_ssize_t count, product;
2851 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002852
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002853 self_s = PyString_AS_STRING(self);
2854 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002855
Andrew Dalke51324072006-05-26 20:25:22 +00002856 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002857 if (count == 0) {
2858 /* no matches, return unchanged */
2859 return return_self(self);
2860 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002861
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002862 /* use the difference between current and new, hence the "-1" */
2863 /* result_len = self_len + count * (to_len-1) */
2864 product = count * (to_len-1);
2865 if (product / (to_len-1) != count) {
2866 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2867 return NULL;
2868 }
2869 result_len = self_len + product;
2870 if (result_len < 0) {
2871 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2872 return NULL;
2873 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875 if ( (result = (PyStringObject *)
2876 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2877 return NULL;
2878 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002879
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002880 start = self_s;
2881 end = self_s + self_len;
2882 while (count-- > 0) {
2883 next = findchar(start, end-start, from_c);
2884 if (next == NULL)
2885 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002886
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002887 if (next == start) {
2888 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002889 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002890 result_s += to_len;
2891 start += 1;
2892 } else {
2893 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002894 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002896 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002897 result_s += to_len;
2898 start = next+1;
2899 }
2900 }
2901 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002902 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002903
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002904 return result;
2905}
2906
2907/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002908Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002910 const char *from_s, Py_ssize_t from_len,
2911 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002913 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002915 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 Py_ssize_t count, offset, product;
2917 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002918
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 self_s = PyString_AS_STRING(self);
2920 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002921
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922 count = countstring(self_s, self_len,
2923 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002924 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002925 if (count == 0) {
2926 /* no matches, return unchanged */
2927 return return_self(self);
2928 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002929
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930 /* Check for overflow */
2931 /* result_len = self_len + count * (to_len-from_len) */
2932 product = count * (to_len-from_len);
2933 if (product / (to_len-from_len) != count) {
2934 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2935 return NULL;
2936 }
2937 result_len = self_len + product;
2938 if (result_len < 0) {
2939 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2940 return NULL;
2941 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002942
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002943 if ( (result = (PyStringObject *)
2944 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2945 return NULL;
2946 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002947
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002948 start = self_s;
2949 end = self_s + self_len;
2950 while (count-- > 0) {
2951 offset = findstring(start, end-start,
2952 from_s, from_len,
2953 0, end-start, FORWARD);
2954 if (offset == -1)
2955 break;
2956 next = start+offset;
2957 if (next == start) {
2958 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002959 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002960 result_s += to_len;
2961 start += from_len;
2962 } else {
2963 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002964 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002965 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002966 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002967 result_s += to_len;
2968 start = next+from_len;
2969 }
2970 }
2971 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002972 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002973
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002974 return result;
2975}
2976
2977
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002978Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002979replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002980 const char *from_s, Py_ssize_t from_len,
2981 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982 Py_ssize_t maxcount)
2983{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002984 if (maxcount < 0) {
2985 maxcount = PY_SSIZE_T_MAX;
2986 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2987 /* nothing to do; return the original string */
2988 return return_self(self);
2989 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002990
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002991 if (maxcount == 0 ||
2992 (from_len == 0 && to_len == 0)) {
2993 /* nothing to do; return the original string */
2994 return return_self(self);
2995 }
2996
2997 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002998
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002999 if (from_len == 0) {
3000 /* insert the 'to' string everywhere. */
3001 /* >>> "Python".replace("", ".") */
3002 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003003 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003004 }
3005
3006 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3007 /* point for an empty self string to generate a non-empty string */
3008 /* Special case so the remaining code always gets a non-empty string */
3009 if (PyString_GET_SIZE(self) == 0) {
3010 return return_self(self);
3011 }
3012
3013 if (to_len == 0) {
3014 /* delete all occurances of 'from' string */
3015 if (from_len == 1) {
3016 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003017 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003018 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003019 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003020 }
3021 }
3022
3023 /* Handle special case where both strings have the same length */
3024
3025 if (from_len == to_len) {
3026 if (from_len == 1) {
3027 return replace_single_character_in_place(
3028 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003029 from_s[0],
3030 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003031 maxcount);
3032 } else {
3033 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003034 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003035 }
3036 }
3037
3038 /* Otherwise use the more generic algorithms */
3039 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003040 return replace_single_character(self, from_s[0],
3041 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 } else {
3043 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003044 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003045 }
3046}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003047
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003048PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003049"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003050\n\
3051Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003052old replaced by new. If the optional argument count is\n\
3053given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
3055static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003056string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003057{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003058 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003059 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003060 const char *from_s, *to_s;
3061 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003062
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003063 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003066 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003067 from_s = PyString_AS_STRING(from);
3068 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003070#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003071 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003072 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003073 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003074#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003075 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076 return NULL;
3077
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003079 to_s = PyString_AS_STRING(to);
3080 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003081 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003082#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003083 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003084 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003085 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003086#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003087 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 return NULL;
3089
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003090 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003091 from_s, from_len,
3092 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093}
3094
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003095/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003096
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003097/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003098 * against substr, using the start and end arguments. Returns
3099 * -1 on error, 0 if not found and 1 if found.
3100 */
3101Py_LOCAL(int)
3102_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3103 Py_ssize_t end, int direction)
3104{
3105 Py_ssize_t len = PyString_GET_SIZE(self);
3106 Py_ssize_t slen;
3107 const char* sub;
3108 const char* str;
3109
3110 if (PyString_Check(substr)) {
3111 sub = PyString_AS_STRING(substr);
3112 slen = PyString_GET_SIZE(substr);
3113 }
3114#ifdef Py_USING_UNICODE
3115 else if (PyUnicode_Check(substr))
3116 return PyUnicode_Tailmatch((PyObject *)self,
3117 substr, start, end, direction);
3118#endif
3119 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3120 return -1;
3121 str = PyString_AS_STRING(self);
3122
3123 string_adjust_indices(&start, &end, len);
3124
3125 if (direction < 0) {
3126 /* startswith */
3127 if (start+slen > len)
3128 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003129 } else {
3130 /* endswith */
3131 if (end-start < slen || start > len)
3132 return 0;
3133
3134 if (end-slen > start)
3135 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003136 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003137 if (end-start >= slen)
3138 return ! memcmp(str+start, sub, slen);
3139 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003140}
3141
3142
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003143PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003144"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003145\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003146Return True if S starts with the specified prefix, False otherwise.\n\
3147With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003148With optional end, stop comparing S at that position.\n\
3149prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150
3151static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003152string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003153{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003154 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003155 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003157 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158
Guido van Rossumc6821402000-05-08 14:08:05 +00003159 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3160 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003162 if (PyTuple_Check(subobj)) {
3163 Py_ssize_t i;
3164 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3165 result = _string_tailmatch(self,
3166 PyTuple_GET_ITEM(subobj, i),
3167 start, end, -1);
3168 if (result == -1)
3169 return NULL;
3170 else if (result) {
3171 Py_RETURN_TRUE;
3172 }
3173 }
3174 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175 }
Georg Brandl24250812006-06-09 18:45:48 +00003176 result = _string_tailmatch(self, subobj, start, end, -1);
3177 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003179 else
Georg Brandl24250812006-06-09 18:45:48 +00003180 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181}
3182
3183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003184PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003185"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003186\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003187Return True if S ends with the specified suffix, False otherwise.\n\
3188With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003189With optional end, stop comparing S at that position.\n\
3190suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191
3192static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003193string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003194{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003195 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003196 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003197 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003198 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199
Guido van Rossumc6821402000-05-08 14:08:05 +00003200 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3201 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003203 if (PyTuple_Check(subobj)) {
3204 Py_ssize_t i;
3205 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3206 result = _string_tailmatch(self,
3207 PyTuple_GET_ITEM(subobj, i),
3208 start, end, +1);
3209 if (result == -1)
3210 return NULL;
3211 else if (result) {
3212 Py_RETURN_TRUE;
3213 }
3214 }
3215 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 }
Georg Brandl24250812006-06-09 18:45:48 +00003217 result = _string_tailmatch(self, subobj, start, end, +1);
3218 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003220 else
Georg Brandl24250812006-06-09 18:45:48 +00003221 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003222}
3223
3224
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003225PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003226"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003227\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003228Encodes S using the codec registered for encoding. encoding defaults\n\
3229to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003230handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003231a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3232'xmlcharrefreplace' as well as any other name registered with\n\
3233codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003234
3235static PyObject *
3236string_encode(PyStringObject *self, PyObject *args)
3237{
3238 char *encoding = NULL;
3239 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003240 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003241
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003242 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3243 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003244 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003245 if (v == NULL)
3246 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003247 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3248 PyErr_Format(PyExc_TypeError,
3249 "encoder did not return a string/unicode object "
3250 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003251 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003252 Py_DECREF(v);
3253 return NULL;
3254 }
3255 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003256
3257 onError:
3258 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003259}
3260
3261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003262PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003263"S.decode([encoding[,errors]]) -> object\n\
3264\n\
3265Decodes S using the codec registered for encoding. encoding defaults\n\
3266to the default encoding. errors may be given to set a different error\n\
3267handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003268a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3269as well as any other name registerd with codecs.register_error that is\n\
3270able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003271
3272static PyObject *
3273string_decode(PyStringObject *self, PyObject *args)
3274{
3275 char *encoding = NULL;
3276 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003277 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003278
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003279 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3280 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003281 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003282 if (v == NULL)
3283 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003284 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3285 PyErr_Format(PyExc_TypeError,
3286 "decoder did not return a string/unicode object "
3287 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003288 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003289 Py_DECREF(v);
3290 return NULL;
3291 }
3292 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003293
3294 onError:
3295 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003296}
3297
3298
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003299PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003300"S.expandtabs([tabsize]) -> string\n\
3301\n\
3302Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003303If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003304
3305static PyObject*
3306string_expandtabs(PyStringObject *self, PyObject *args)
3307{
3308 const char *e, *p;
3309 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003310 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003311 PyObject *u;
3312 int tabsize = 8;
3313
3314 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3315 return NULL;
3316
Thomas Wouters7e474022000-07-16 12:04:32 +00003317 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003318 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003319 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3320 for (p = PyString_AS_STRING(self); p < e; p++)
3321 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003322 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003323 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003324 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003325 PyErr_SetString(PyExc_OverflowError,
3326 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003327 return NULL;
3328 }
3329 old_j = j;
3330 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003331 }
3332 else {
3333 j++;
3334 if (*p == '\n' || *p == '\r') {
3335 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003336 old_j = j = 0;
3337 if (i < 0) {
3338 PyErr_SetString(PyExc_OverflowError,
3339 "new string is too long");
3340 return NULL;
3341 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 }
3343 }
3344
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003345 if ((i + j) < 0) {
3346 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3347 return NULL;
3348 }
3349
Guido van Rossum4c08d552000-03-10 22:55:18 +00003350 /* Second pass: create output string and fill it */
3351 u = PyString_FromStringAndSize(NULL, i + j);
3352 if (!u)
3353 return NULL;
3354
3355 j = 0;
3356 q = PyString_AS_STRING(u);
3357
3358 for (p = PyString_AS_STRING(self); p < e; p++)
3359 if (*p == '\t') {
3360 if (tabsize > 0) {
3361 i = tabsize - (j % tabsize);
3362 j += i;
3363 while (i--)
3364 *q++ = ' ';
3365 }
3366 }
3367 else {
3368 j++;
3369 *q++ = *p;
3370 if (*p == '\n' || *p == '\r')
3371 j = 0;
3372 }
3373
3374 return u;
3375}
3376
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003377Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003378pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379{
3380 PyObject *u;
3381
3382 if (left < 0)
3383 left = 0;
3384 if (right < 0)
3385 right = 0;
3386
Tim Peters8fa5dd02001-09-12 02:18:30 +00003387 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388 Py_INCREF(self);
3389 return (PyObject *)self;
3390 }
3391
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003392 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003393 left + PyString_GET_SIZE(self) + right);
3394 if (u) {
3395 if (left)
3396 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003397 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003398 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 PyString_GET_SIZE(self));
3400 if (right)
3401 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3402 fill, right);
3403 }
3404
3405 return u;
3406}
3407
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003408PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003409"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003410"\n"
3411"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003412"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003413
3414static PyObject *
3415string_ljust(PyStringObject *self, PyObject *args)
3416{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003417 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003418 char fillchar = ' ';
3419
Thomas Wouters4abb3662006-04-19 14:50:15 +00003420 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003421 return NULL;
3422
Tim Peters8fa5dd02001-09-12 02:18:30 +00003423 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003424 Py_INCREF(self);
3425 return (PyObject*) self;
3426 }
3427
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003428 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429}
3430
3431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003432PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003433"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003434"\n"
3435"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003436"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003437
3438static PyObject *
3439string_rjust(PyStringObject *self, PyObject *args)
3440{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003441 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442 char fillchar = ' ';
3443
Thomas Wouters4abb3662006-04-19 14:50:15 +00003444 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445 return NULL;
3446
Tim Peters8fa5dd02001-09-12 02:18:30 +00003447 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448 Py_INCREF(self);
3449 return (PyObject*) self;
3450 }
3451
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003452 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453}
3454
3455
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003456PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003457"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003458"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003459"Return S centered in a string of length width. Padding is\n"
3460"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461
3462static PyObject *
3463string_center(PyStringObject *self, PyObject *args)
3464{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003465 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003466 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003467 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468
Thomas Wouters4abb3662006-04-19 14:50:15 +00003469 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003470 return NULL;
3471
Tim Peters8fa5dd02001-09-12 02:18:30 +00003472 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473 Py_INCREF(self);
3474 return (PyObject*) self;
3475 }
3476
3477 marg = width - PyString_GET_SIZE(self);
3478 left = marg / 2 + (marg & width & 1);
3479
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003480 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003481}
3482
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003483PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003484"S.zfill(width) -> string\n"
3485"\n"
3486"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003487"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003488
3489static PyObject *
3490string_zfill(PyStringObject *self, PyObject *args)
3491{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003492 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003493 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003494 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003495 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003496
Thomas Wouters4abb3662006-04-19 14:50:15 +00003497 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003498 return NULL;
3499
3500 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003501 if (PyString_CheckExact(self)) {
3502 Py_INCREF(self);
3503 return (PyObject*) self;
3504 }
3505 else
3506 return PyString_FromStringAndSize(
3507 PyString_AS_STRING(self),
3508 PyString_GET_SIZE(self)
3509 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003510 }
3511
3512 fill = width - PyString_GET_SIZE(self);
3513
3514 s = pad(self, fill, 0, '0');
3515
3516 if (s == NULL)
3517 return NULL;
3518
3519 p = PyString_AS_STRING(s);
3520 if (p[fill] == '+' || p[fill] == '-') {
3521 /* move sign to beginning of string */
3522 p[0] = p[fill];
3523 p[fill] = '0';
3524 }
3525
3526 return (PyObject*) s;
3527}
3528
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003529PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003530"S.isspace() -> bool\n\
3531\n\
3532Return True if all characters in S are whitespace\n\
3533and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534
3535static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003536string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537{
Fred Drakeba096332000-07-09 07:04:36 +00003538 register const unsigned char *p
3539 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003540 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542 /* Shortcut for single character strings */
3543 if (PyString_GET_SIZE(self) == 1 &&
3544 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003545 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003547 /* Special case for empty strings */
3548 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003550
Guido van Rossum4c08d552000-03-10 22:55:18 +00003551 e = p + PyString_GET_SIZE(self);
3552 for (; p < e; p++) {
3553 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003556 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557}
3558
3559
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003560PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003561"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003563Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003564and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565
3566static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003567string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003568{
Fred Drakeba096332000-07-09 07:04:36 +00003569 register const unsigned char *p
3570 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571 register const unsigned char *e;
3572
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573 /* Shortcut for single character strings */
3574 if (PyString_GET_SIZE(self) == 1 &&
3575 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003577
3578 /* Special case for empty strings */
3579 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581
3582 e = p + PyString_GET_SIZE(self);
3583 for (; p < e; p++) {
3584 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003588}
3589
3590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003591PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003592"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003594Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003595and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596
3597static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003598string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599{
Fred Drakeba096332000-07-09 07:04:36 +00003600 register const unsigned char *p
3601 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602 register const unsigned char *e;
3603
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003604 /* Shortcut for single character strings */
3605 if (PyString_GET_SIZE(self) == 1 &&
3606 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003608
3609 /* Special case for empty strings */
3610 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003612
3613 e = p + PyString_GET_SIZE(self);
3614 for (; p < e; p++) {
3615 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003617 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003619}
3620
3621
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003622PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003625Return True if all characters in S are digits\n\
3626and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627
3628static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003629string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630{
Fred Drakeba096332000-07-09 07:04:36 +00003631 register const unsigned char *p
3632 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003633 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635 /* Shortcut for single character strings */
3636 if (PyString_GET_SIZE(self) == 1 &&
3637 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003640 /* Special case for empty strings */
3641 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003643
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644 e = p + PyString_GET_SIZE(self);
3645 for (; p < e; p++) {
3646 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650}
3651
3652
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003653PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003657at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658
3659static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003660string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661{
Fred Drakeba096332000-07-09 07:04:36 +00003662 register const unsigned char *p
3663 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003664 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 int cased;
3666
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 /* Shortcut for single character strings */
3668 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003671 /* Special case for empty strings */
3672 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003674
Guido van Rossum4c08d552000-03-10 22:55:18 +00003675 e = p + PyString_GET_SIZE(self);
3676 cased = 0;
3677 for (; p < e; p++) {
3678 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 else if (!cased && islower(*p))
3681 cased = 1;
3682 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003683 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003684}
3685
3686
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003687PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003690Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003691at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
3693static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003694string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695{
Fred Drakeba096332000-07-09 07:04:36 +00003696 register const unsigned char *p
3697 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003698 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 int cased;
3700
Guido van Rossum4c08d552000-03-10 22:55:18 +00003701 /* Shortcut for single character strings */
3702 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003705 /* Special case for empty strings */
3706 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003708
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 e = p + PyString_GET_SIZE(self);
3710 cased = 0;
3711 for (; p < e; p++) {
3712 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003713 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714 else if (!cased && isupper(*p))
3715 cased = 1;
3716 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003717 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718}
3719
3720
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003721PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003722"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003724Return True if S is a titlecased string and there is at least one\n\
3725character in S, i.e. uppercase characters may only follow uncased\n\
3726characters and lowercase characters only cased ones. Return False\n\
3727otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728
3729static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003730string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731{
Fred Drakeba096332000-07-09 07:04:36 +00003732 register const unsigned char *p
3733 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003734 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735 int cased, previous_is_cased;
3736
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737 /* Shortcut for single character strings */
3738 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003741 /* Special case for empty strings */
3742 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003743 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003744
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 e = p + PyString_GET_SIZE(self);
3746 cased = 0;
3747 previous_is_cased = 0;
3748 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003749 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750
3751 if (isupper(ch)) {
3752 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003753 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754 previous_is_cased = 1;
3755 cased = 1;
3756 }
3757 else if (islower(ch)) {
3758 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 previous_is_cased = 1;
3761 cased = 1;
3762 }
3763 else
3764 previous_is_cased = 0;
3765 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003766 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767}
3768
3769
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003770PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003771"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772\n\
3773Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003774Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003775is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777static PyObject*
3778string_splitlines(PyStringObject *self, PyObject *args)
3779{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003780 register Py_ssize_t i;
3781 register Py_ssize_t j;
3782 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003783 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 PyObject *list;
3785 PyObject *str;
3786 char *data;
3787
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003788 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 return NULL;
3790
3791 data = PyString_AS_STRING(self);
3792 len = PyString_GET_SIZE(self);
3793
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003794 /* This does not use the preallocated list because splitlines is
3795 usually run with hundreds of newlines. The overhead of
3796 switching between PyList_SET_ITEM and append causes about a
3797 2-3% slowdown for that common case. A smarter implementation
3798 could move the if check out, so the SET_ITEMs are done first
3799 and the appends only done when the prealloc buffer is full.
3800 That's too much work for little gain.*/
3801
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802 list = PyList_New(0);
3803 if (!list)
3804 goto onError;
3805
3806 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003807 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003808
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809 /* Find a line and append it */
3810 while (i < len && data[i] != '\n' && data[i] != '\r')
3811 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003812
3813 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003814 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 if (i < len) {
3816 if (data[i] == '\r' && i + 1 < len &&
3817 data[i+1] == '\n')
3818 i += 2;
3819 else
3820 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003821 if (keepends)
3822 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003823 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003824 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003825 j = i;
3826 }
3827 if (j < len) {
3828 SPLIT_APPEND(data, j, len);
3829 }
3830
3831 return list;
3832
3833 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003834 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835 return NULL;
3836}
3837
3838#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003839#undef SPLIT_ADD
3840#undef MAX_PREALLOC
3841#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003842
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003843static PyObject *
3844string_getnewargs(PyStringObject *v)
3845{
Martin v. Löwis68192102007-07-21 06:55:02 +00003846 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003847}
3848
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003849
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003850static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003851string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852 /* Counterparts of the obsolete stropmodule functions; except
3853 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003854 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3855 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003856 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003857 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3858 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003859 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3860 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3861 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3862 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3863 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3864 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3865 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003866 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3867 capitalize__doc__},
3868 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3869 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3870 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003871 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003872 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3873 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3874 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3875 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3876 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3877 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3878 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003879 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3880 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003881 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3882 startswith__doc__},
3883 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3884 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3885 swapcase__doc__},
3886 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3887 translate__doc__},
3888 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3889 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3890 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3891 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3892 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3893 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3894 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3895 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3896 expandtabs__doc__},
3897 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3898 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003899 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003900 {NULL, NULL} /* sentinel */
3901};
3902
Jeremy Hylton938ace62002-07-17 16:30:39 +00003903static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003904str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3905
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003906static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003907string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003908{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003909 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003910 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003911
Guido van Rossumae960af2001-08-30 03:11:59 +00003912 if (type != &PyString_Type)
3913 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003914 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3915 return NULL;
3916 if (x == NULL)
3917 return PyString_FromString("");
3918 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003919}
3920
Guido van Rossumae960af2001-08-30 03:11:59 +00003921static PyObject *
3922str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3923{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003924 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003925 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003926
3927 assert(PyType_IsSubtype(type, &PyString_Type));
3928 tmp = string_new(&PyString_Type, args, kwds);
3929 if (tmp == NULL)
3930 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003931 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003932 n = PyString_GET_SIZE(tmp);
3933 pnew = type->tp_alloc(type, n);
3934 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003935 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003936 ((PyStringObject *)pnew)->ob_shash =
3937 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003938 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003939 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003940 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003941 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003942}
3943
Guido van Rossumcacfc072002-05-24 19:01:59 +00003944static PyObject *
3945basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3946{
3947 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003948 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003949 return NULL;
3950}
3951
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003952static PyObject *
3953string_mod(PyObject *v, PyObject *w)
3954{
3955 if (!PyString_Check(v)) {
3956 Py_INCREF(Py_NotImplemented);
3957 return Py_NotImplemented;
3958 }
3959 return PyString_Format(v, w);
3960}
3961
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003962PyDoc_STRVAR(basestring_doc,
3963"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003964
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003965static PyNumberMethods string_as_number = {
3966 0, /*nb_add*/
3967 0, /*nb_subtract*/
3968 0, /*nb_multiply*/
3969 0, /*nb_divide*/
3970 string_mod, /*nb_remainder*/
3971};
3972
3973
Guido van Rossumcacfc072002-05-24 19:01:59 +00003974PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00003975 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003976 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003977 0,
3978 0,
3979 0, /* tp_dealloc */
3980 0, /* tp_print */
3981 0, /* tp_getattr */
3982 0, /* tp_setattr */
3983 0, /* tp_compare */
3984 0, /* tp_repr */
3985 0, /* tp_as_number */
3986 0, /* tp_as_sequence */
3987 0, /* tp_as_mapping */
3988 0, /* tp_hash */
3989 0, /* tp_call */
3990 0, /* tp_str */
3991 0, /* tp_getattro */
3992 0, /* tp_setattro */
3993 0, /* tp_as_buffer */
3994 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3995 basestring_doc, /* tp_doc */
3996 0, /* tp_traverse */
3997 0, /* tp_clear */
3998 0, /* tp_richcompare */
3999 0, /* tp_weaklistoffset */
4000 0, /* tp_iter */
4001 0, /* tp_iternext */
4002 0, /* tp_methods */
4003 0, /* tp_members */
4004 0, /* tp_getset */
4005 &PyBaseObject_Type, /* tp_base */
4006 0, /* tp_dict */
4007 0, /* tp_descr_get */
4008 0, /* tp_descr_set */
4009 0, /* tp_dictoffset */
4010 0, /* tp_init */
4011 0, /* tp_alloc */
4012 basestring_new, /* tp_new */
4013 0, /* tp_free */
4014};
4015
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004016PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004017"str(object) -> string\n\
4018\n\
4019Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004020If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004021
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004022PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004023 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004025 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004026 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004027 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004028 (printfunc)string_print, /* tp_print */
4029 0, /* tp_getattr */
4030 0, /* tp_setattr */
4031 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004032 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004033 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004034 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004035 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004036 (hashfunc)string_hash, /* tp_hash */
4037 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004038 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039 PyObject_GenericGetAttr, /* tp_getattro */
4040 0, /* tp_setattro */
4041 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004042 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004043 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004044 string_doc, /* tp_doc */
4045 0, /* tp_traverse */
4046 0, /* tp_clear */
4047 (richcmpfunc)string_richcompare, /* tp_richcompare */
4048 0, /* tp_weaklistoffset */
4049 0, /* tp_iter */
4050 0, /* tp_iternext */
4051 string_methods, /* tp_methods */
4052 0, /* tp_members */
4053 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004054 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004055 0, /* tp_dict */
4056 0, /* tp_descr_get */
4057 0, /* tp_descr_set */
4058 0, /* tp_dictoffset */
4059 0, /* tp_init */
4060 0, /* tp_alloc */
4061 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004062 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004063};
4064
4065void
Fred Drakeba096332000-07-09 07:04:36 +00004066PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004067{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004068 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004069 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 if (w == NULL || !PyString_Check(*pv)) {
4072 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004073 *pv = NULL;
4074 return;
4075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004076 v = string_concat((PyStringObject *) *pv, w);
4077 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078 *pv = v;
4079}
4080
Guido van Rossum013142a1994-08-30 08:19:36 +00004081void
Fred Drakeba096332000-07-09 07:04:36 +00004082PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004083{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004084 PyString_Concat(pv, w);
4085 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004086}
4087
4088
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089/* The following function breaks the notion that strings are immutable:
4090 it changes the size of a string. We get away with this only if there
4091 is only one module referencing the object. You can also think of it
4092 as creating a new string object and destroying the old one, only
4093 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004094 already be known to some other part of the code...
4095 Note that if there's not enough memory to resize the string, the original
4096 string object at *pv is deallocated, *pv is set to NULL, an "out of
4097 memory" exception is set, and -1 is returned. Else (on success) 0 is
4098 returned, and the value in *pv may or may not be the same as on input.
4099 As always, an extra byte is allocated for a trailing \0 byte (newsize
4100 does *not* include that), and a trailing \0 byte is stored.
4101*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004102
4103int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004104_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004105{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 register PyObject *v;
4107 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004108 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004109 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004110 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004112 Py_DECREF(v);
4113 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004114 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004115 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004116 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004117 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004118 _Py_ForgetReference(v);
4119 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004120 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004121 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004122 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004124 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004125 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126 _Py_NewReference(*pv);
4127 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004128 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004129 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004130 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004131 return 0;
4132}
Guido van Rossume5372401993-03-16 12:15:04 +00004133
4134/* Helpers for formatstring */
4135
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004136Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004137getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004138{
Thomas Wouters977485d2006-02-16 15:59:12 +00004139 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004140 if (argidx < arglen) {
4141 (*p_argidx)++;
4142 if (arglen < 0)
4143 return args;
4144 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004145 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004146 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004147 PyErr_SetString(PyExc_TypeError,
4148 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004149 return NULL;
4150}
4151
Tim Peters38fd5b62000-09-21 05:43:11 +00004152/* Format codes
4153 * F_LJUST '-'
4154 * F_SIGN '+'
4155 * F_BLANK ' '
4156 * F_ALT '#'
4157 * F_ZERO '0'
4158 */
Guido van Rossume5372401993-03-16 12:15:04 +00004159#define F_LJUST (1<<0)
4160#define F_SIGN (1<<1)
4161#define F_BLANK (1<<2)
4162#define F_ALT (1<<3)
4163#define F_ZERO (1<<4)
4164
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004165Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004166formatfloat(char *buf, size_t buflen, int flags,
4167 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004168{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004169 /* fmt = '%#.' + `prec` + `type`
4170 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004171 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004172 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004173 x = PyFloat_AsDouble(v);
4174 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004175 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004176 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004177 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004178 }
Guido van Rossume5372401993-03-16 12:15:04 +00004179 if (prec < 0)
4180 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004181 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4182 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004183 /* Worst case length calc to ensure no buffer overrun:
4184
4185 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004186 fmt = %#.<prec>g
4187 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004188 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004190
4191 'f' formats:
4192 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4193 len = 1 + 50 + 1 + prec = 52 + prec
4194
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004195 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004196 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004197
4198 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004199 if (((type == 'g' || type == 'G') &&
4200 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004201 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004202 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004203 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004204 return -1;
4205 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004206 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4207 (flags&F_ALT) ? "#" : "",
4208 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004209 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004210 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004211}
4212
Tim Peters38fd5b62000-09-21 05:43:11 +00004213/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4214 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4215 * Python's regular ints.
4216 * Return value: a new PyString*, or NULL if error.
4217 * . *pbuf is set to point into it,
4218 * *plen set to the # of chars following that.
4219 * Caller must decref it when done using pbuf.
4220 * The string starting at *pbuf is of the form
4221 * "-"? ("0x" | "0X")? digit+
4222 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004223 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004224 * There will be at least prec digits, zero-filled on the left if
4225 * necessary to get that many.
4226 * val object to be converted
4227 * flags bitmask of format flags; only F_ALT is looked at
4228 * prec minimum number of digits; 0-fill on left if needed
4229 * type a character in [duoxX]; u acts the same as d
4230 *
4231 * CAUTION: o, x and X conversions on regular ints can never
4232 * produce a '-' sign, but can for Python's unbounded ints.
4233 */
4234PyObject*
4235_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4236 char **pbuf, int *plen)
4237{
4238 PyObject *result = NULL;
4239 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004240 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 int sign; /* 1 if '-', else 0 */
4242 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004243 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004244 int numdigits; /* len == numnondigits + numdigits */
4245 int numnondigits = 0;
4246
4247 switch (type) {
4248 case 'd':
4249 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004250 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004251 break;
4252 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004253 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004254 break;
4255 case 'x':
4256 case 'X':
4257 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004258 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004259 break;
4260 default:
4261 assert(!"'type' not in [duoxX]");
4262 }
4263 if (!result)
4264 return NULL;
4265
Neal Norwitz56423e52006-08-13 18:11:08 +00004266 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004267 if (!buf) {
4268 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004269 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004270 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004271
Tim Peters38fd5b62000-09-21 05:43:11 +00004272 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004273 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004274 PyErr_BadInternalCall();
4275 return NULL;
4276 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004277 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004278 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004279 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4280 return NULL;
4281 }
4282 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004283 if (buf[len-1] == 'L') {
4284 --len;
4285 buf[len] = '\0';
4286 }
4287 sign = buf[0] == '-';
4288 numnondigits += sign;
4289 numdigits = len - numnondigits;
4290 assert(numdigits > 0);
4291
Tim Petersfff53252001-04-12 18:38:48 +00004292 /* Get rid of base marker unless F_ALT */
4293 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004294 /* Need to skip 0x, 0X or 0. */
4295 int skipped = 0;
4296 switch (type) {
4297 case 'o':
4298 assert(buf[sign] == '0');
4299 /* If 0 is only digit, leave it alone. */
4300 if (numdigits > 1) {
4301 skipped = 1;
4302 --numdigits;
4303 }
4304 break;
4305 case 'x':
4306 case 'X':
4307 assert(buf[sign] == '0');
4308 assert(buf[sign + 1] == 'x');
4309 skipped = 2;
4310 numnondigits -= 2;
4311 break;
4312 }
4313 if (skipped) {
4314 buf += skipped;
4315 len -= skipped;
4316 if (sign)
4317 buf[0] = '-';
4318 }
4319 assert(len == numnondigits + numdigits);
4320 assert(numdigits > 0);
4321 }
4322
4323 /* Fill with leading zeroes to meet minimum width. */
4324 if (prec > numdigits) {
4325 PyObject *r1 = PyString_FromStringAndSize(NULL,
4326 numnondigits + prec);
4327 char *b1;
4328 if (!r1) {
4329 Py_DECREF(result);
4330 return NULL;
4331 }
4332 b1 = PyString_AS_STRING(r1);
4333 for (i = 0; i < numnondigits; ++i)
4334 *b1++ = *buf++;
4335 for (i = 0; i < prec - numdigits; i++)
4336 *b1++ = '0';
4337 for (i = 0; i < numdigits; i++)
4338 *b1++ = *buf++;
4339 *b1 = '\0';
4340 Py_DECREF(result);
4341 result = r1;
4342 buf = PyString_AS_STRING(result);
4343 len = numnondigits + prec;
4344 }
4345
4346 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004347 if (type == 'X') {
4348 /* Need to convert all lower case letters to upper case.
4349 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004350 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004351 if (buf[i] >= 'a' && buf[i] <= 'x')
4352 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004353 }
4354 *pbuf = buf;
4355 *plen = len;
4356 return result;
4357}
4358
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004359Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004360formatint(char *buf, size_t buflen, int flags,
4361 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004362{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004363 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004364 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4365 + 1 + 1 = 24 */
4366 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004367 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004368 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004369
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004370 x = PyInt_AsLong(v);
4371 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004372 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004373 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004374 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004375 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004376 if (x < 0 && type == 'u') {
4377 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004378 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004379 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4380 sign = "-";
4381 else
4382 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004383 if (prec < 0)
4384 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004385
4386 if ((flags & F_ALT) &&
4387 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004388 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004389 * of issues that cause pain:
4390 * - when 0 is being converted, the C standard leaves off
4391 * the '0x' or '0X', which is inconsistent with other
4392 * %#x/%#X conversions and inconsistent with Python's
4393 * hex() function
4394 * - there are platforms that violate the standard and
4395 * convert 0 with the '0x' or '0X'
4396 * (Metrowerks, Compaq Tru64)
4397 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004398 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004399 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004400 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 * We can achieve the desired consistency by inserting our
4402 * own '0x' or '0X' prefix, and substituting %x/%X in place
4403 * of %#x/%#X.
4404 *
4405 * Note that this is the same approach as used in
4406 * formatint() in unicodeobject.c
4407 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004408 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4409 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004410 }
4411 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004412 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4413 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004414 prec, type);
4415 }
4416
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004417 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4418 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004419 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004420 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004421 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004422 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004423 return -1;
4424 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004425 if (sign[0])
4426 PyOS_snprintf(buf, buflen, fmt, -x);
4427 else
4428 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004429 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004430}
4431
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004432Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004433formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004434{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004435 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004436 if (PyString_Check(v)) {
4437 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004438 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004439 }
4440 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004441 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004442 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004443 }
4444 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004445 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004446}
4447
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004448/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4449
4450 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4451 chars are formatted. XXX This is a magic number. Each formatting
4452 routine does bounds checking to ensure no overflow, but a better
4453 solution may be to malloc a buffer of appropriate size for each
4454 format. For now, the current solution is sufficient.
4455*/
4456#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004457
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004458PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004459PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004460{
4461 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004462 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004463 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004464 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004465 PyObject *result, *orig_args;
4466#ifdef Py_USING_UNICODE
4467 PyObject *v, *w;
4468#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004469 PyObject *dict = NULL;
4470 if (format == NULL || !PyString_Check(format) || args == NULL) {
4471 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004472 return NULL;
4473 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004474 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004475 fmt = PyString_AS_STRING(format);
4476 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004477 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004478 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004479 if (result == NULL)
4480 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 res = PyString_AsString(result);
4482 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004483 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004484 argidx = 0;
4485 }
4486 else {
4487 arglen = -1;
4488 argidx = -2;
4489 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004490 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004491 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004492 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004493 while (--fmtcnt >= 0) {
4494 if (*fmt != '%') {
4495 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004496 rescnt = fmtcnt + 100;
4497 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004499 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004500 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004501 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004502 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004503 }
4504 *res++ = *fmt++;
4505 }
4506 else {
4507 /* Got a format specifier */
4508 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004509 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004510 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004511 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004512 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004513 PyObject *v = NULL;
4514 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004515 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004516 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004517 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004518 char formatbuf[FORMATBUFLEN];
4519 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004520#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004521 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004522 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004523#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004524
Guido van Rossumda9c2711996-12-05 21:58:58 +00004525 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004526 if (*fmt == '(') {
4527 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004528 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004529 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004530 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004531
4532 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004534 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004535 goto error;
4536 }
4537 ++fmt;
4538 --fmtcnt;
4539 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004540 /* Skip over balanced parentheses */
4541 while (pcount > 0 && --fmtcnt >= 0) {
4542 if (*fmt == ')')
4543 --pcount;
4544 else if (*fmt == '(')
4545 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004546 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004547 }
4548 keylen = fmt - keystart - 1;
4549 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 "incomplete format key");
4552 goto error;
4553 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 key = PyString_FromStringAndSize(keystart,
4555 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004556 if (key == NULL)
4557 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004558 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004559 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004560 args_owned = 0;
4561 }
4562 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004563 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004564 if (args == NULL) {
4565 goto error;
4566 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004567 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 arglen = -1;
4569 argidx = -2;
4570 }
Guido van Rossume5372401993-03-16 12:15:04 +00004571 while (--fmtcnt >= 0) {
4572 switch (c = *fmt++) {
4573 case '-': flags |= F_LJUST; continue;
4574 case '+': flags |= F_SIGN; continue;
4575 case ' ': flags |= F_BLANK; continue;
4576 case '#': flags |= F_ALT; continue;
4577 case '0': flags |= F_ZERO; continue;
4578 }
4579 break;
4580 }
4581 if (c == '*') {
4582 v = getnextarg(args, arglen, &argidx);
4583 if (v == NULL)
4584 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004585 if (!PyInt_Check(v)) {
4586 PyErr_SetString(PyExc_TypeError,
4587 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004588 goto error;
4589 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004590 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004591 if (width < 0) {
4592 flags |= F_LJUST;
4593 width = -width;
4594 }
Guido van Rossume5372401993-03-16 12:15:04 +00004595 if (--fmtcnt >= 0)
4596 c = *fmt++;
4597 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004598 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004599 width = c - '0';
4600 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004601 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004602 if (!isdigit(c))
4603 break;
4604 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004605 PyErr_SetString(
4606 PyExc_ValueError,
4607 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004608 goto error;
4609 }
4610 width = width*10 + (c - '0');
4611 }
4612 }
4613 if (c == '.') {
4614 prec = 0;
4615 if (--fmtcnt >= 0)
4616 c = *fmt++;
4617 if (c == '*') {
4618 v = getnextarg(args, arglen, &argidx);
4619 if (v == NULL)
4620 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004621 if (!PyInt_Check(v)) {
4622 PyErr_SetString(
4623 PyExc_TypeError,
4624 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004625 goto error;
4626 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004628 if (prec < 0)
4629 prec = 0;
4630 if (--fmtcnt >= 0)
4631 c = *fmt++;
4632 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004633 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004634 prec = c - '0';
4635 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004636 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004637 if (!isdigit(c))
4638 break;
4639 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004640 PyErr_SetString(
4641 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004642 "prec too big");
4643 goto error;
4644 }
4645 prec = prec*10 + (c - '0');
4646 }
4647 }
4648 } /* prec */
4649 if (fmtcnt >= 0) {
4650 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004651 if (--fmtcnt >= 0)
4652 c = *fmt++;
4653 }
4654 }
4655 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004656 PyErr_SetString(PyExc_ValueError,
4657 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004658 goto error;
4659 }
4660 if (c != '%') {
4661 v = getnextarg(args, arglen, &argidx);
4662 if (v == NULL)
4663 goto error;
4664 }
4665 sign = 0;
4666 fill = ' ';
4667 switch (c) {
4668 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004669 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004670 len = 1;
4671 break;
4672 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004673#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004674 if (PyUnicode_Check(v)) {
4675 fmt = fmt_start;
4676 argidx = argidx_start;
4677 goto unicode;
4678 }
Georg Brandld45014b2005-10-01 17:06:00 +00004679#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004680 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004681#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004682 if (temp != NULL && PyUnicode_Check(temp)) {
4683 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004684 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004685 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004686 goto unicode;
4687 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004688#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004689 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004690 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004691 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004692 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004693 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004694 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004695 if (!PyString_Check(temp)) {
4696 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004697 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004698 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004699 goto error;
4700 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004701 pbuf = PyString_AS_STRING(temp);
4702 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004703 if (prec >= 0 && len > prec)
4704 len = prec;
4705 break;
4706 case 'i':
4707 case 'd':
4708 case 'u':
4709 case 'o':
4710 case 'x':
4711 case 'X':
4712 if (c == 'i')
4713 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004714 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004715 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004716 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004717 prec, c, &pbuf, &ilen);
4718 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004719 if (!temp)
4720 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004721 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004722 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004723 else {
4724 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004725 len = formatint(pbuf,
4726 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004727 flags, prec, c, v);
4728 if (len < 0)
4729 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004730 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004731 }
4732 if (flags & F_ZERO)
4733 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004734 break;
4735 case 'e':
4736 case 'E':
4737 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004738 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004739 case 'g':
4740 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004741 if (c == 'F')
4742 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004743 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004744 len = formatfloat(pbuf, sizeof(formatbuf),
4745 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004746 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004747 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004748 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004749 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004750 fill = '0';
4751 break;
4752 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004753#ifdef Py_USING_UNICODE
4754 if (PyUnicode_Check(v)) {
4755 fmt = fmt_start;
4756 argidx = argidx_start;
4757 goto unicode;
4758 }
4759#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004760 pbuf = formatbuf;
4761 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004762 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004763 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004764 break;
4765 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004766 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004767 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004768 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004769 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004770 (Py_ssize_t)(fmt - 1 -
4771 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004772 goto error;
4773 }
4774 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004775 if (*pbuf == '-' || *pbuf == '+') {
4776 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004777 len--;
4778 }
4779 else if (flags & F_SIGN)
4780 sign = '+';
4781 else if (flags & F_BLANK)
4782 sign = ' ';
4783 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004784 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004785 }
4786 if (width < len)
4787 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004788 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004789 reslen -= rescnt;
4790 rescnt = width + fmtcnt + 100;
4791 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004792 if (reslen < 0) {
4793 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004794 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004795 return PyErr_NoMemory();
4796 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004797 if (_PyString_Resize(&result, reslen) < 0) {
4798 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004799 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004800 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004801 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004802 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004803 }
4804 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004805 if (fill != ' ')
4806 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004807 rescnt--;
4808 if (width > len)
4809 width--;
4810 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004811 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4812 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004813 assert(pbuf[1] == c);
4814 if (fill != ' ') {
4815 *res++ = *pbuf++;
4816 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 }
Tim Petersfff53252001-04-12 18:38:48 +00004818 rescnt -= 2;
4819 width -= 2;
4820 if (width < 0)
4821 width = 0;
4822 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 }
4824 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004825 do {
4826 --rescnt;
4827 *res++ = fill;
4828 } while (--width > len);
4829 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004830 if (fill == ' ') {
4831 if (sign)
4832 *res++ = sign;
4833 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004834 (c == 'x' || c == 'X')) {
4835 assert(pbuf[0] == '0');
4836 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004837 *res++ = *pbuf++;
4838 *res++ = *pbuf++;
4839 }
4840 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004841 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004842 res += len;
4843 rescnt -= len;
4844 while (--width >= len) {
4845 --rescnt;
4846 *res++ = ' ';
4847 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004848 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004849 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004850 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004851 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004852 goto error;
4853 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004854 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004855 } /* '%' */
4856 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004857 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004858 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004859 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004860 goto error;
4861 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004862 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004863 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004864 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004865 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004866 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004867
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004868#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004869 unicode:
4870 if (args_owned) {
4871 Py_DECREF(args);
4872 args_owned = 0;
4873 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004874 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004875 if (PyTuple_Check(orig_args) && argidx > 0) {
4876 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004877 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004878 v = PyTuple_New(n);
4879 if (v == NULL)
4880 goto error;
4881 while (--n >= 0) {
4882 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4883 Py_INCREF(w);
4884 PyTuple_SET_ITEM(v, n, w);
4885 }
4886 args = v;
4887 } else {
4888 Py_INCREF(orig_args);
4889 args = orig_args;
4890 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004891 args_owned = 1;
4892 /* Take what we have of the result and let the Unicode formatting
4893 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004894 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004895 if (_PyString_Resize(&result, rescnt))
4896 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004897 fmtcnt = PyString_GET_SIZE(format) - \
4898 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004899 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4900 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004901 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004902 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004903 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004904 if (v == NULL)
4905 goto error;
4906 /* Paste what we have (result) to what the Unicode formatting
4907 function returned (v) and return the result (or error) */
4908 w = PyUnicode_Concat(result, v);
4909 Py_DECREF(result);
4910 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004911 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004912 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004913#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004914
Guido van Rossume5372401993-03-16 12:15:04 +00004915 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004916 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004917 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004918 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004919 }
Guido van Rossume5372401993-03-16 12:15:04 +00004920 return NULL;
4921}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922
Guido van Rossum2a61e741997-01-18 07:55:05 +00004923void
Fred Drakeba096332000-07-09 07:04:36 +00004924PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004925{
4926 register PyStringObject *s = (PyStringObject *)(*p);
4927 PyObject *t;
4928 if (s == NULL || !PyString_Check(s))
4929 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004930 /* If it's a string subclass, we don't really know what putting
4931 it in the interned dict might do. */
4932 if (!PyString_CheckExact(s))
4933 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004934 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004935 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936 if (interned == NULL) {
4937 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004938 if (interned == NULL) {
4939 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004940 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004941 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004943 t = PyDict_GetItem(interned, (PyObject *)s);
4944 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004946 Py_DECREF(*p);
4947 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004948 return;
4949 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004950
Armin Rigo79f7ad22004-08-07 19:27:39 +00004951 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004952 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953 return;
4954 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004955 /* The two references in interned are not counted by refcnt.
4956 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004957 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004958 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004959}
4960
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961void
4962PyString_InternImmortal(PyObject **p)
4963{
4964 PyString_InternInPlace(p);
4965 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4966 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4967 Py_INCREF(*p);
4968 }
4969}
4970
Guido van Rossum2a61e741997-01-18 07:55:05 +00004971
4972PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004973PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004974{
4975 PyObject *s = PyString_FromString(cp);
4976 if (s == NULL)
4977 return NULL;
4978 PyString_InternInPlace(&s);
4979 return s;
4980}
4981
Guido van Rossum8cf04761997-08-02 02:57:45 +00004982void
Fred Drakeba096332000-07-09 07:04:36 +00004983PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004984{
4985 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004986 for (i = 0; i < UCHAR_MAX + 1; i++) {
4987 Py_XDECREF(characters[i]);
4988 characters[i] = NULL;
4989 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004990 Py_XDECREF(nullstring);
4991 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004992}
Barry Warsawa903ad982001-02-23 16:40:48 +00004993
Barry Warsawa903ad982001-02-23 16:40:48 +00004994void _Py_ReleaseInternedStrings(void)
4995{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004996 PyObject *keys;
4997 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004998 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00004999 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005000
5001 if (interned == NULL || !PyDict_Check(interned))
5002 return;
5003 keys = PyDict_Keys(interned);
5004 if (keys == NULL || !PyList_Check(keys)) {
5005 PyErr_Clear();
5006 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005007 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005008
5009 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5010 detector, interned strings are not forcibly deallocated; rather, we
5011 give them their stolen references back, and then clear and DECREF
5012 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005013
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005014 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005015 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5016 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005017 for (i = 0; i < n; i++) {
5018 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5019 switch (s->ob_sstate) {
5020 case SSTATE_NOT_INTERNED:
5021 /* XXX Shouldn't happen */
5022 break;
5023 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005024 Py_Refcnt(s) += 1;
5025 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005026 break;
5027 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005028 Py_Refcnt(s) += 2;
5029 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005030 break;
5031 default:
5032 Py_FatalError("Inconsistent interned string state.");
5033 }
5034 s->ob_sstate = SSTATE_NOT_INTERNED;
5035 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005036 fprintf(stderr, "total size of all interned strings: "
5037 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5038 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005039 Py_DECREF(keys);
5040 PyDict_Clear(interned);
5041 Py_DECREF(interned);
5042 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005043}