blob: 3c140229812d68ca5ad241e04f38bdef8a720588 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000722 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000755 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Brett Cannon01531592007-09-17 03:28:34 +0000793 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000810 Py_ssize_t size = Py_Size(op);
Brett Cannon01531592007-09-17 03:28:34 +0000811 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000812 while (size > INT_MAX) {
813 /* Very long strings cannot be written atomically.
814 * But don't write exactly INT_MAX bytes at a time
815 * to avoid memory aligment issues.
816 */
817 const int chunk_size = INT_MAX & ~0x3FFF;
818 fwrite(data, 1, chunk_size, fp);
819 data += chunk_size;
820 size -= chunk_size;
821 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000825 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#endif
Brett Cannon01531592007-09-17 03:28:34 +0000827 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000828 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000833 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
834 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '"';
836
Brett Cannon01531592007-09-17 03:28:34 +0000837 str_len = Py_Size(op);
838 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000840 for (i = 0; i < str_len; i++) {
841 /* Since strings are immutable and the caller should have a
842 reference, accessing the interal buffer should not be an issue
843 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\r");
853 else if (c < ' ' || c >= 0x7f)
854 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000855 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000856 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000859 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000860 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863PyObject *
864PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000866 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000867 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000869 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 PyErr_SetString(PyExc_OverflowError,
871 "string is too large to make repr");
872 }
873 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000875 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 }
877 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000878 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879 register char c;
880 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 int quote;
882
Thomas Wouters7e474022000-07-16 12:04:32 +0000883 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000885 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000886 memchr(op->ob_sval, '\'', Py_Size(op)) &&
887 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000888 quote = '"';
889
Tim Peters9161c8b2001-12-03 01:55:38 +0000890 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000891 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000892 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 /* There's at least enough room for a hex escape
894 and a closing quote. */
895 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000897 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000899 else if (c == '\t')
900 *p++ = '\\', *p++ = 't';
901 else if (c == '\n')
902 *p++ = '\\', *p++ = 'n';
903 else if (c == '\r')
904 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000905 else if (c < ' ' || c >= 0x7f) {
906 /* For performance, we don't want to call
907 PyOS_snprintf here (extra layers of
908 function call). */
909 sprintf(p, "\\x%02x", c & 0xff);
910 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000911 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000912 else
913 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000915 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000916 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000919 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000921 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922}
923
Guido van Rossum189f1df2001-05-01 16:51:53 +0000924static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000925string_repr(PyObject *op)
926{
927 return PyString_Repr(op, 1);
928}
929
930static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000931string_str(PyObject *s)
932{
Tim Petersc9933152001-10-16 20:18:24 +0000933 assert(PyString_Check(s));
934 if (PyString_CheckExact(s)) {
935 Py_INCREF(s);
936 return s;
937 }
938 else {
939 /* Subtype -- return genuine string with the same value. */
940 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000941 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000942 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000943}
944
Martin v. Löwis18e16552006-02-15 17:27:45 +0000945static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000946string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947{
Martin v. Löwis68192102007-07-21 06:55:02 +0000948 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949}
950
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000951static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000952string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953{
Andrew Dalke598710c2006-05-25 18:18:39 +0000954 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 register PyStringObject *op;
956 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000957#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000958 if (PyUnicode_Check(bb))
959 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000960#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000961 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000962 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000963 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 return NULL;
965 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000967 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000968 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000969 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000970 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000971 Py_INCREF(bb);
972 return bb;
973 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974 Py_INCREF(a);
975 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000977 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000978 if (size < 0) {
979 PyErr_SetString(PyExc_OverflowError,
980 "strings are too large to concat");
981 return NULL;
982 }
983
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000984 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000985 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000986 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000988 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000989 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000990 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000991 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
992 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000993 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995#undef b
996}
997
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000999string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001001 register Py_ssize_t i;
1002 register Py_ssize_t j;
1003 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001004 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001005 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001006 if (n < 0)
1007 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001008 /* watch out for overflows: the size can overflow int,
1009 * and the # of bytes needed can overflow size_t
1010 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001011 size = Py_Size(a) * n;
1012 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001013 PyErr_SetString(PyExc_OverflowError,
1014 "repeated string is too long");
1015 return NULL;
1016 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001017 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001018 Py_INCREF(a);
1019 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020 }
Tim Peterse7c05322004-06-27 17:24:49 +00001021 nbytes = (size_t)size;
1022 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001023 PyErr_SetString(PyExc_OverflowError,
1024 "repeated string is too long");
1025 return NULL;
1026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001028 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001029 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001031 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001032 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001033 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001034 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001035 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001036 memset(op->ob_sval, a->ob_sval[0] , n);
1037 return (PyObject *) op;
1038 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001039 i = 0;
1040 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001041 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1042 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001043 }
1044 while (i < size) {
1045 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001046 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001047 i += j;
1048 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050}
1051
1052/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1053
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001054static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001055string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001056 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001057 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001058{
1059 if (i < 0)
1060 i = 0;
1061 if (j < 0)
1062 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001063 if (j > Py_Size(a))
1064 j = Py_Size(a);
1065 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001066 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067 Py_INCREF(a);
1068 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069 }
1070 if (j < i)
1071 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001072 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073}
1074
Guido van Rossum9284a572000-03-07 15:53:43 +00001075static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001076string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001077{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001078 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001079#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 if (PyUnicode_Check(sub_obj))
1081 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001082#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001084 PyErr_Format(PyExc_TypeError,
1085 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001086 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001087 return -1;
1088 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001089 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001090
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001091 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001092}
1093
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001094static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001095string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001097 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001098 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001099 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001100 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001101 return NULL;
1102 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001103 pchar = a->ob_sval[i];
1104 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001105 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001106 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001107 else {
1108#ifdef COUNT_ALLOCS
1109 one_strings++;
1110#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001111 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001112 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001113 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001114}
1115
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116static PyObject*
1117string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001118{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001120 Py_ssize_t len_a, len_b;
1121 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 PyObject *result;
1123
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001124 /* Make sure both arguments are strings. */
1125 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001126 result = Py_NotImplemented;
1127 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001128 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001129 if (a == b) {
1130 switch (op) {
1131 case Py_EQ:case Py_LE:case Py_GE:
1132 result = Py_True;
1133 goto out;
1134 case Py_NE:case Py_LT:case Py_GT:
1135 result = Py_False;
1136 goto out;
1137 }
1138 }
1139 if (op == Py_EQ) {
1140 /* Supporting Py_NE here as well does not save
1141 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001142 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001143 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001144 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001145 result = Py_True;
1146 } else {
1147 result = Py_False;
1148 }
1149 goto out;
1150 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001151 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001152 min_len = (len_a < len_b) ? len_a : len_b;
1153 if (min_len > 0) {
1154 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1155 if (c==0)
1156 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001157 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001158 c = 0;
1159 if (c == 0)
1160 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1161 switch (op) {
1162 case Py_LT: c = c < 0; break;
1163 case Py_LE: c = c <= 0; break;
1164 case Py_EQ: assert(0); break; /* unreachable */
1165 case Py_NE: c = c != 0; break;
1166 case Py_GT: c = c > 0; break;
1167 case Py_GE: c = c >= 0; break;
1168 default:
1169 result = Py_NotImplemented;
1170 goto out;
1171 }
1172 result = c ? Py_True : Py_False;
1173 out:
1174 Py_INCREF(result);
1175 return result;
1176}
1177
1178int
1179_PyString_Eq(PyObject *o1, PyObject *o2)
1180{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001181 PyStringObject *a = (PyStringObject*) o1;
1182 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001183 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001184 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001185 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001186}
1187
Guido van Rossum9bfef441993-03-29 10:43:31 +00001188static long
Fred Drakeba096332000-07-09 07:04:36 +00001189string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001191 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001192 register unsigned char *p;
1193 register long x;
1194
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 if (a->ob_shash != -1)
1196 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001197 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001198 p = (unsigned char *) a->ob_sval;
1199 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001200 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001201 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001202 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001203 if (x == -1)
1204 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001205 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001206 return x;
1207}
1208
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209static PyObject*
1210string_subscript(PyStringObject* self, PyObject* item)
1211{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001212 if (PyIndex_Check(item)) {
1213 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 if (i == -1 && PyErr_Occurred())
1215 return NULL;
1216 if (i < 0)
1217 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001218 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 }
1220 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001221 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001222 char* source_buf;
1223 char* result_buf;
1224 PyObject* result;
1225
Tim Petersae1d0c92006-03-17 03:29:34 +00001226 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227 PyString_GET_SIZE(self),
1228 &start, &stop, &step, &slicelength) < 0) {
1229 return NULL;
1230 }
1231
1232 if (slicelength <= 0) {
1233 return PyString_FromStringAndSize("", 0);
1234 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001235 else if (start == 0 && step == 1 &&
1236 slicelength == PyString_GET_SIZE(self) &&
1237 PyString_CheckExact(self)) {
1238 Py_INCREF(self);
1239 return (PyObject *)self;
1240 }
1241 else if (step == 1) {
1242 return PyString_FromStringAndSize(
1243 PyString_AS_STRING(self) + start,
1244 slicelength);
1245 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 else {
1247 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001248 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001249 if (result_buf == NULL)
1250 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001251
Tim Petersae1d0c92006-03-17 03:29:34 +00001252 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001253 cur += step, i++) {
1254 result_buf[i] = source_buf[cur];
1255 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001256
1257 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001258 slicelength);
1259 PyMem_Free(result_buf);
1260 return result;
1261 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001262 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001263 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001264 PyErr_Format(PyExc_TypeError,
1265 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001266 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001267 return NULL;
1268 }
1269}
1270
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271static Py_ssize_t
1272string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273{
1274 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001275 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001276 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001277 return -1;
1278 }
1279 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001280 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001281}
1282
Martin v. Löwis18e16552006-02-15 17:27:45 +00001283static Py_ssize_t
1284string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001285{
Guido van Rossum045e6881997-09-08 18:30:11 +00001286 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001287 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001288 return -1;
1289}
1290
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291static Py_ssize_t
1292string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001293{
1294 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001295 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001296 return 1;
1297}
1298
Martin v. Löwis18e16552006-02-15 17:27:45 +00001299static Py_ssize_t
1300string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001301{
1302 if ( index != 0 ) {
1303 PyErr_SetString(PyExc_SystemError,
1304 "accessing non-existent string segment");
1305 return -1;
1306 }
1307 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001308 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001309}
1310
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001311static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001312 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001313 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001314 (ssizeargfunc)string_repeat, /*sq_repeat*/
1315 (ssizeargfunc)string_item, /*sq_item*/
1316 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001317 0, /*sq_ass_item*/
1318 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001319 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001320};
1321
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001323 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001324 (binaryfunc)string_subscript,
1325 0,
1326};
1327
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001328static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001329 (readbufferproc)string_buffer_getreadbuf,
1330 (writebufferproc)string_buffer_getwritebuf,
1331 (segcountproc)string_buffer_getsegcount,
1332 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001333};
1334
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335
1336
1337#define LEFTSTRIP 0
1338#define RIGHTSTRIP 1
1339#define BOTHSTRIP 2
1340
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001341/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001342static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1343
1344#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001345
Andrew Dalke525eab32006-05-26 14:00:45 +00001346
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001347/* Don't call if length < 2 */
1348#define Py_STRING_MATCH(target, offset, pattern, length) \
1349 (target[offset] == pattern[0] && \
1350 target[offset+length-1] == pattern[length-1] && \
1351 !memcmp(target+offset+1, pattern+1, length-2) )
1352
1353
Andrew Dalke525eab32006-05-26 14:00:45 +00001354/* Overallocate the initial list to reduce the number of reallocs for small
1355 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1356 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1357 text (roughly 11 words per line) and field delimited data (usually 1-10
1358 fields). For large strings the split algorithms are bandwidth limited
1359 so increasing the preallocation likely will not improve things.*/
1360
1361#define MAX_PREALLOC 12
1362
1363/* 5 splits gives 6 elements */
1364#define PREALLOC_SIZE(maxsplit) \
1365 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1366
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001367#define SPLIT_APPEND(data, left, right) \
1368 str = PyString_FromStringAndSize((data) + (left), \
1369 (right) - (left)); \
1370 if (str == NULL) \
1371 goto onError; \
1372 if (PyList_Append(list, str)) { \
1373 Py_DECREF(str); \
1374 goto onError; \
1375 } \
1376 else \
1377 Py_DECREF(str);
1378
Andrew Dalke02758d62006-05-26 15:21:01 +00001379#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001380 str = PyString_FromStringAndSize((data) + (left), \
1381 (right) - (left)); \
1382 if (str == NULL) \
1383 goto onError; \
1384 if (count < MAX_PREALLOC) { \
1385 PyList_SET_ITEM(list, count, str); \
1386 } else { \
1387 if (PyList_Append(list, str)) { \
1388 Py_DECREF(str); \
1389 goto onError; \
1390 } \
1391 else \
1392 Py_DECREF(str); \
1393 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001394 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001395
1396/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001397#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001398
Andrew Dalke02758d62006-05-26 15:21:01 +00001399#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1400#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1401#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1402#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1403
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001404Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001405split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
Andrew Dalke525eab32006-05-26 14:00:45 +00001407 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410
1411 if (list == NULL)
1412 return NULL;
1413
Andrew Dalke02758d62006-05-26 15:21:01 +00001414 i = j = 0;
1415
1416 while (maxsplit-- > 0) {
1417 SKIP_SPACE(s, i, len);
1418 if (i==len) break;
1419 j = i; i++;
1420 SKIP_NONSPACE(s, i, len);
1421 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001423
1424 if (i < len) {
1425 /* Only occurs when maxsplit was reached */
1426 /* Skip any remaining whitespace and copy to end of string */
1427 SKIP_SPACE(s, i, len);
1428 if (i != len)
1429 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434 Py_DECREF(list);
1435 return NULL;
1436}
1437
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001438Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001439split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440{
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001443 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444
1445 if (list == NULL)
1446 return NULL;
1447
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001448 i = j = 0;
1449 while ((j < len) && (maxcount-- > 0)) {
1450 for(; j<len; j++) {
1451 /* I found that using memchr makes no difference */
1452 if (s[j] == ch) {
1453 SPLIT_ADD(s, i, j);
1454 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001455 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001456 }
1457 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001458 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001459 if (i <= len) {
1460 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 return list;
1464
1465 onError:
1466 Py_DECREF(list);
1467 return NULL;
1468}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001470PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471"S.split([sep [,maxsplit]]) -> list of strings\n\
1472\n\
1473Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001475splits are done. If sep is not specified or is None, any\n\
1476whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477
1478static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001479string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001481 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001482 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001484 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001485#ifdef USE_FAST
1486 Py_ssize_t pos;
1487#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488
Martin v. Löwis9c830762006-04-13 08:37:17 +00001489 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001492 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001495 if (PyString_Check(subobj)) {
1496 sub = PyString_AS_STRING(subobj);
1497 n = PyString_GET_SIZE(subobj);
1498 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001499#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 else if (PyUnicode_Check(subobj))
1501 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001502#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1504 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001505
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 if (n == 0) {
1507 PyErr_SetString(PyExc_ValueError, "empty separator");
1508 return NULL;
1509 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 else if (n == 1)
1511 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512
Andrew Dalke525eab32006-05-26 14:00:45 +00001513 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 if (list == NULL)
1515 return NULL;
1516
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001517#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001519 while (maxsplit-- > 0) {
1520 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1521 if (pos < 0)
1522 break;
1523 j = i+pos;
1524 SPLIT_ADD(s, i, j);
1525 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001527#else
1528 i = j = 0;
1529 while ((j+n <= len) && (maxsplit-- > 0)) {
1530 for (; j+n <= len; j++) {
1531 if (Py_STRING_MATCH(s, j, sub, n)) {
1532 SPLIT_ADD(s, i, j);
1533 i = j = j + n;
1534 break;
1535 }
1536 }
1537 }
1538#endif
1539 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001540 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541 return list;
1542
Andrew Dalke525eab32006-05-26 14:00:45 +00001543 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 Py_DECREF(list);
1545 return NULL;
1546}
1547
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548PyDoc_STRVAR(partition__doc__,
1549"S.partition(sep) -> (head, sep, tail)\n\
1550\n\
1551Searches for the separator sep in S, and returns the part before it,\n\
1552the separator itself, and the part after it. If the separator is not\n\
1553found, returns S and two empty strings.");
1554
1555static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001556string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001558 const char *sep;
1559 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001560
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001561 if (PyString_Check(sep_obj)) {
1562 sep = PyString_AS_STRING(sep_obj);
1563 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001564 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001565#ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001567 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001568#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001569 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001570 return NULL;
1571
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001572 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001573 (PyObject*) self,
1574 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1575 sep_obj, sep, sep_len
1576 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001577}
1578
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001579PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001580"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001581\n\
1582Searches for the separator sep in S, starting at the end of S, and returns\n\
1583the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001584separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001585
1586static PyObject *
1587string_rpartition(PyStringObject *self, PyObject *sep_obj)
1588{
1589 const char *sep;
1590 Py_ssize_t sep_len;
1591
1592 if (PyString_Check(sep_obj)) {
1593 sep = PyString_AS_STRING(sep_obj);
1594 sep_len = PyString_GET_SIZE(sep_obj);
1595 }
1596#ifdef Py_USING_UNICODE
1597 else if (PyUnicode_Check(sep_obj))
1598 return PyUnicode_Partition((PyObject *) self, sep_obj);
1599#endif
1600 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1601 return NULL;
1602
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001603 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001604 (PyObject*) self,
1605 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1606 sep_obj, sep, sep_len
1607 );
1608}
1609
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001610Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001611rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001612{
Andrew Dalke525eab32006-05-26 14:00:45 +00001613 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001614 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001615 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001616
1617 if (list == NULL)
1618 return NULL;
1619
Andrew Dalke02758d62006-05-26 15:21:01 +00001620 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001621
Andrew Dalke02758d62006-05-26 15:21:01 +00001622 while (maxsplit-- > 0) {
1623 RSKIP_SPACE(s, i);
1624 if (i<0) break;
1625 j = i; i--;
1626 RSKIP_NONSPACE(s, i);
1627 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001628 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001629 if (i >= 0) {
1630 /* Only occurs when maxsplit was reached */
1631 /* Skip any remaining whitespace and copy to beginning of string */
1632 RSKIP_SPACE(s, i);
1633 if (i >= 0)
1634 SPLIT_ADD(s, 0, i + 1);
1635
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001636 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001637 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001638 if (PyList_Reverse(list) < 0)
1639 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001640 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001642 Py_DECREF(list);
1643 return NULL;
1644}
1645
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001646Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001647rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001648{
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001651 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001652
1653 if (list == NULL)
1654 return NULL;
1655
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001656 i = j = len - 1;
1657 while ((i >= 0) && (maxcount-- > 0)) {
1658 for (; i >= 0; i--) {
1659 if (s[i] == ch) {
1660 SPLIT_ADD(s, i + 1, j + 1);
1661 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001662 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001663 }
1664 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001665 }
1666 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001667 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001668 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001669 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001670 if (PyList_Reverse(list) < 0)
1671 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001672 return list;
1673
1674 onError:
1675 Py_DECREF(list);
1676 return NULL;
1677}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678
1679PyDoc_STRVAR(rsplit__doc__,
1680"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1681\n\
1682Return a list of the words in the string S, using sep as the\n\
1683delimiter string, starting at the end of the string and working\n\
1684to the front. If maxsplit is given, at most maxsplit splits are\n\
1685done. If sep is not specified or is None, any whitespace string\n\
1686is a separator.");
1687
1688static PyObject *
1689string_rsplit(PyStringObject *self, PyObject *args)
1690{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001691 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001692 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001694 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695
Martin v. Löwis9c830762006-04-13 08:37:17 +00001696 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001697 return NULL;
1698 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001699 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 if (subobj == Py_None)
1701 return rsplit_whitespace(s, len, maxsplit);
1702 if (PyString_Check(subobj)) {
1703 sub = PyString_AS_STRING(subobj);
1704 n = PyString_GET_SIZE(subobj);
1705 }
1706#ifdef Py_USING_UNICODE
1707 else if (PyUnicode_Check(subobj))
1708 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1709#endif
1710 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1711 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001712
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 if (n == 0) {
1714 PyErr_SetString(PyExc_ValueError, "empty separator");
1715 return NULL;
1716 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001717 else if (n == 1)
1718 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719
Andrew Dalke525eab32006-05-26 14:00:45 +00001720 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001721 if (list == NULL)
1722 return NULL;
1723
1724 j = len;
1725 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001726
1727 while ( (i >= 0) && (maxsplit-- > 0) ) {
1728 for (; i>=0; i--) {
1729 if (Py_STRING_MATCH(s, i, sub, n)) {
1730 SPLIT_ADD(s, i + n, j);
1731 j = i;
1732 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001733 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001734 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001735 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001737 SPLIT_ADD(s, 0, j);
1738 FIX_PREALLOC_SIZE(list);
1739 if (PyList_Reverse(list) < 0)
1740 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001741 return list;
1742
Andrew Dalke525eab32006-05-26 14:00:45 +00001743onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001744 Py_DECREF(list);
1745 return NULL;
1746}
1747
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001749PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750"S.join(sequence) -> string\n\
1751\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754
1755static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001756string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757{
1758 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001759 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001764 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001765 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766
Tim Peters19fe14e2001-01-19 03:03:47 +00001767 seq = PySequence_Fast(orig, "");
1768 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001769 return NULL;
1770 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001771
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001772 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 if (seqlen == 0) {
1774 Py_DECREF(seq);
1775 return PyString_FromString("");
1776 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001779 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1780 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001781 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001782 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001785
Raymond Hettinger674f2412004-08-23 23:23:54 +00001786 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001787 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001788 * Do a pre-pass to figure out the total amount of space we'll
1789 * need (sz), see whether any argument is absurd, and defer to
1790 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001793 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 item = PySequence_Fast_GET_ITEM(seq, i);
1795 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001796#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001797 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001798 /* Defer to Unicode join.
1799 * CAUTION: There's no gurantee that the
1800 * original sequence can be iterated over
1801 * again, so we must pass seq here.
1802 */
1803 PyObject *result;
1804 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001805 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001806 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001807 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001808#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001809 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001810 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001811 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001812 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001813 Py_DECREF(seq);
1814 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001815 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001816 sz += PyString_GET_SIZE(item);
1817 if (i != 0)
1818 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001819 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001821 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 Py_DECREF(seq);
1823 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001825 }
1826
1827 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001829 if (res == NULL) {
1830 Py_DECREF(seq);
1831 return NULL;
1832 }
1833
1834 /* Catenate everything. */
1835 p = PyString_AS_STRING(res);
1836 for (i = 0; i < seqlen; ++i) {
1837 size_t n;
1838 item = PySequence_Fast_GET_ITEM(seq, i);
1839 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001840 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001841 p += n;
1842 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001843 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001844 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001845 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001847
Jeremy Hylton49048292000-07-11 03:28:17 +00001848 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850}
1851
Tim Peters52e155e2001-06-16 05:42:57 +00001852PyObject *
1853_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001854{
Tim Petersa7259592001-06-16 05:11:17 +00001855 assert(sep != NULL && PyString_Check(sep));
1856 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001857 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001858}
1859
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001860Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001861string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001862{
1863 if (*end > len)
1864 *end = len;
1865 else if (*end < 0)
1866 *end += len;
1867 if (*end < 0)
1868 *end = 0;
1869 if (*start < 0)
1870 *start += len;
1871 if (*start < 0)
1872 *start = 0;
1873}
1874
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001875Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001876string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001878 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001879 const char *sub;
1880 Py_ssize_t sub_len;
1881 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001883 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1884 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 return -2;
1886 if (PyString_Check(subobj)) {
1887 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001888 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001889 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001892 return PyUnicode_Find(
1893 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001894#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001895 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001896 /* XXX - the "expected a character buffer object" is pretty
1897 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 return -2;
1899
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001900 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001901 return stringlib_find_slice(
1902 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1903 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001904 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001905 return stringlib_rfind_slice(
1906 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1907 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.find(sub [,start [,end]]) -> int\n\
1913\n\
1914Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001915such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916arguments start and end are interpreted as in slice notation.\n\
1917\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001918Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919
1920static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001921string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001923 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 if (result == -2)
1925 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001926 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927}
1928
1929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001930PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931"S.index(sub [,start [,end]]) -> int\n\
1932\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001933Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934
1935static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001936string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939 if (result == -2)
1940 return NULL;
1941 if (result == -1) {
1942 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001943 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 return NULL;
1945 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rfind(sub [,start [,end]]) -> int\n\
1952\n\
1953Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001954such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955arguments start and end are interpreted as in slice notation.\n\
1956\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 if (result == -2)
1964 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966}
1967
1968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001969PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970"S.rindex(sub [,start [,end]]) -> int\n\
1971\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001972Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973
1974static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001975string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 if (result == -2)
1979 return NULL;
1980 if (result == -1) {
1981 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001982 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 return NULL;
1984 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986}
1987
1988
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001989Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1991{
1992 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001993 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001995 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1996 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001997
1998 i = 0;
1999 if (striptype != RIGHTSTRIP) {
2000 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2001 i++;
2002 }
2003 }
2004
2005 j = len;
2006 if (striptype != LEFTSTRIP) {
2007 do {
2008 j--;
2009 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2010 j++;
2011 }
2012
2013 if (i == 0 && j == len && PyString_CheckExact(self)) {
2014 Py_INCREF(self);
2015 return (PyObject*)self;
2016 }
2017 else
2018 return PyString_FromStringAndSize(s+i, j-i);
2019}
2020
2021
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002022Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002023do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024{
2025 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002026 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028 i = 0;
2029 if (striptype != RIGHTSTRIP) {
2030 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2031 i++;
2032 }
2033 }
2034
2035 j = len;
2036 if (striptype != LEFTSTRIP) {
2037 do {
2038 j--;
2039 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2040 j++;
2041 }
2042
Tim Peters8fa5dd02001-09-12 02:18:30 +00002043 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 Py_INCREF(self);
2045 return (PyObject*)self;
2046 }
2047 else
2048 return PyString_FromStringAndSize(s+i, j-i);
2049}
2050
2051
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002052Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002053do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2054{
2055 PyObject *sep = NULL;
2056
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002057 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002058 return NULL;
2059
2060 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002061 if (PyString_Check(sep))
2062 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002063#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002064 else if (PyUnicode_Check(sep)) {
2065 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2066 PyObject *res;
2067 if (uniself==NULL)
2068 return NULL;
2069 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2070 striptype, sep);
2071 Py_DECREF(uniself);
2072 return res;
2073 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002074#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002075 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002076#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002077 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002078#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002079 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002080#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002081 STRIPNAME(striptype));
2082 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083 }
2084
2085 return do_strip(self, striptype);
2086}
2087
2088
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002089PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002090"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091\n\
2092Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002093whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094If chars is given and not None, remove characters in chars instead.\n\
2095If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100 if (PyTuple_GET_SIZE(args) == 0)
2101 return do_strip(self, BOTHSTRIP); /* Common case */
2102 else
2103 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104}
2105
2106
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002107PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002108"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002111If chars is given and not None, remove characters in chars instead.\n\
2112If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113
2114static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002117 if (PyTuple_GET_SIZE(args) == 0)
2118 return do_strip(self, LEFTSTRIP); /* Common case */
2119 else
2120 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121}
2122
2123
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002125"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002127Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002128If chars is given and not None, remove characters in chars instead.\n\
2129If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130
2131static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002132string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002134 if (PyTuple_GET_SIZE(args) == 0)
2135 return do_strip(self, RIGHTSTRIP); /* Common case */
2136 else
2137 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138}
2139
2140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142"S.lower() -> string\n\
2143\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002144Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002146/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2147#ifndef _tolower
2148#define _tolower tolower
2149#endif
2150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002152string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002154 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002155 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002156 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002158 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002159 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161
2162 s = PyString_AS_STRING(newobj);
2163
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002164 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002165
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002167 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 if (isupper(c))
2169 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171
Anthony Baxtera6286212006-04-11 07:42:36 +00002172 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173}
2174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002175PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176"S.upper() -> string\n\
2177\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002180#ifndef _toupper
2181#define _toupper toupper
2182#endif
2183
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002185string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002187 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002188 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002191 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002192 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002194
2195 s = PyString_AS_STRING(newobj);
2196
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002197 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002200 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002201 if (islower(c))
2202 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002204
Anthony Baxtera6286212006-04-11 07:42:36 +00002205 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206}
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209"S.title() -> string\n\
2210\n\
2211Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213
2214static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002215string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216{
2217 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002218 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002219 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221
Anthony Baxtera6286212006-04-11 07:42:36 +00002222 newobj = PyString_FromStringAndSize(NULL, n);
2223 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 for (i = 0; i < n; i++) {
2227 int c = Py_CHARMASK(*s++);
2228 if (islower(c)) {
2229 if (!previous_is_cased)
2230 c = toupper(c);
2231 previous_is_cased = 1;
2232 } else if (isupper(c)) {
2233 if (previous_is_cased)
2234 c = tolower(c);
2235 previous_is_cased = 1;
2236 } else
2237 previous_is_cased = 0;
2238 *s_new++ = c;
2239 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241}
2242
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002243PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244"S.capitalize() -> string\n\
2245\n\
2246Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248
2249static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002250string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251{
2252 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002253 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002254 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255
Anthony Baxtera6286212006-04-11 07:42:36 +00002256 newobj = PyString_FromStringAndSize(NULL, n);
2257 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002259 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 if (0 < n) {
2261 int c = Py_CHARMASK(*s++);
2262 if (islower(c))
2263 *s_new = toupper(c);
2264 else
2265 *s_new = c;
2266 s_new++;
2267 }
2268 for (i = 1; i < n; i++) {
2269 int c = Py_CHARMASK(*s++);
2270 if (isupper(c))
2271 *s_new = tolower(c);
2272 else
2273 *s_new = c;
2274 s_new++;
2275 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002276 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277}
2278
2279
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002280PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281"S.count(sub[, start[, end]]) -> int\n\
2282\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002283Return the number of non-overlapping occurrences of substring sub in\n\
2284string S[start:end]. Optional arguments start and end are interpreted\n\
2285as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286
2287static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002288string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002290 PyObject *sub_obj;
2291 const char *str = PyString_AS_STRING(self), *sub;
2292 Py_ssize_t sub_len;
2293 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002295 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2296 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002298
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002299 if (PyString_Check(sub_obj)) {
2300 sub = PyString_AS_STRING(sub_obj);
2301 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002303#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002304 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002305 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002306 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002307 if (count == -1)
2308 return NULL;
2309 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002310 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002311 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002312#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002313 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 return NULL;
2315
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002316 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002317
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002318 return PyInt_FromSsize_t(
2319 stringlib_count(str + start, end - start, sub, sub_len)
2320 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321}
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.swapcase() -> string\n\
2325\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002327converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328
2329static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002330string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331{
2332 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002333 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002334 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335
Anthony Baxtera6286212006-04-11 07:42:36 +00002336 newobj = PyString_FromStringAndSize(NULL, n);
2337 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002339 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 for (i = 0; i < n; i++) {
2341 int c = Py_CHARMASK(*s++);
2342 if (islower(c)) {
2343 *s_new = toupper(c);
2344 }
2345 else if (isupper(c)) {
2346 *s_new = tolower(c);
2347 }
2348 else
2349 *s_new = c;
2350 s_new++;
2351 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002352 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353}
2354
2355
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002356PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357"S.translate(table [,deletechars]) -> string\n\
2358\n\
2359Return a copy of the string S, where all characters occurring\n\
2360in the optional argument deletechars are removed, and the\n\
2361remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002362translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
2364static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002365string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002368 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002369 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002371 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002372 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 PyObject *result;
2374 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002377 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380
2381 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002382 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 tablen = PyString_GET_SIZE(tableobj);
2384 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002385 else if (tableobj == Py_None) {
2386 table = NULL;
2387 tablen = 256;
2388 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002389#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002391 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 parameter; instead a mapping to None will cause characters
2393 to be deleted. */
2394 if (delobj != NULL) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2398 }
2399 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002402 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404
Martin v. Löwis00b61272002-12-12 20:03:19 +00002405 if (tablen != 256) {
2406 PyErr_SetString(PyExc_ValueError,
2407 "translation table must be 256 characters long");
2408 return NULL;
2409 }
2410
Guido van Rossum4c08d552000-03-10 22:55:18 +00002411 if (delobj != NULL) {
2412 if (PyString_Check(delobj)) {
2413 del_table = PyString_AS_STRING(delobj);
2414 dellen = PyString_GET_SIZE(delobj);
2415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002416#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 else if (PyUnicode_Check(delobj)) {
2418 PyErr_SetString(PyExc_TypeError,
2419 "deletions are implemented differently for unicode");
2420 return NULL;
2421 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002422#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2424 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 }
2426 else {
2427 del_table = NULL;
2428 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 }
2430
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002431 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432 result = PyString_FromStringAndSize((char *)NULL, inlen);
2433 if (result == NULL)
2434 return NULL;
2435 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002436 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002438 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 /* If no deletions are required, use faster code */
2440 for (i = inlen; --i >= 0; ) {
2441 c = Py_CHARMASK(*input++);
2442 if (Py_CHARMASK((*output++ = table[c])) != c)
2443 changed = 1;
2444 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002445 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 return result;
2447 Py_DECREF(result);
2448 Py_INCREF(input_obj);
2449 return input_obj;
2450 }
2451
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002452 if (table == NULL) {
2453 for (i = 0; i < 256; i++)
2454 trans_table[i] = Py_CHARMASK(i);
2455 } else {
2456 for (i = 0; i < 256; i++)
2457 trans_table[i] = Py_CHARMASK(table[i]);
2458 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459
2460 for (i = 0; i < dellen; i++)
2461 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2462
2463 for (i = inlen; --i >= 0; ) {
2464 c = Py_CHARMASK(*input++);
2465 if (trans_table[c] != -1)
2466 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2467 continue;
2468 changed = 1;
2469 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002470 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471 Py_DECREF(result);
2472 Py_INCREF(input_obj);
2473 return input_obj;
2474 }
2475 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002476 if (inlen > 0)
2477 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478 return result;
2479}
2480
2481
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002482#define FORWARD 1
2483#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002487#define findchar(target, target_len, c) \
2488 ((char *)memchr((const void *)(target), c, target_len))
2489
2490/* String ops must return a string. */
2491/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002492Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002494{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002495 if (PyString_CheckExact(self)) {
2496 Py_INCREF(self);
2497 return self;
2498 }
2499 return (PyStringObject *)PyString_FromStringAndSize(
2500 PyString_AS_STRING(self),
2501 PyString_GET_SIZE(self));
2502}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002504Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002505countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002506{
2507 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002508 const char *start=target;
2509 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002511 while ( (start=findchar(start, end-start, c)) != NULL ) {
2512 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002513 if (count >= maxcount)
2514 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002515 start += 1;
2516 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002517 return count;
2518}
2519
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002520Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002521findstring(const char *target, Py_ssize_t target_len,
2522 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002523 Py_ssize_t start,
2524 Py_ssize_t end,
2525 int direction)
2526{
2527 if (start < 0) {
2528 start += target_len;
2529 if (start < 0)
2530 start = 0;
2531 }
2532 if (end > target_len) {
2533 end = target_len;
2534 } else if (end < 0) {
2535 end += target_len;
2536 if (end < 0)
2537 end = 0;
2538 }
2539
2540 /* zero-length substrings always match at the first attempt */
2541 if (pattern_len == 0)
2542 return (direction > 0) ? start : end;
2543
2544 end -= pattern_len;
2545
2546 if (direction < 0) {
2547 for (; end >= start; end--)
2548 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2549 return end;
2550 } else {
2551 for (; start <= end; start++)
2552 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2553 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002554 }
2555 return -1;
2556}
2557
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002558Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002559countstring(const char *target, Py_ssize_t target_len,
2560 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002561 Py_ssize_t start,
2562 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002563 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002564{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 if (start < 0) {
2568 start += target_len;
2569 if (start < 0)
2570 start = 0;
2571 }
2572 if (end > target_len) {
2573 end = target_len;
2574 } else if (end < 0) {
2575 end += target_len;
2576 if (end < 0)
2577 end = 0;
2578 }
2579
2580 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002581 if (pattern_len == 0 || maxcount == 0) {
2582 if (target_len+1 < maxcount)
2583 return target_len+1;
2584 return maxcount;
2585 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586
2587 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002588 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002589 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002590 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2591 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002592 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 end -= pattern_len-1;
2594 }
2595 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002596 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002597 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2598 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002599 if (--maxcount <= 0)
2600 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002601 start += pattern_len-1;
2602 }
2603 }
2604 return count;
2605}
2606
2607
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002608/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002609
2610/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002611Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002612replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002613 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002614 Py_ssize_t maxcount)
2615{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002616 char *self_s, *result_s;
2617 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 Py_ssize_t count, i, product;
2619 PyStringObject *result;
2620
2621 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002622
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002623 /* 1 at the end plus 1 after every character */
2624 count = self_len+1;
2625 if (maxcount < count)
2626 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002627
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002628 /* Check for overflow */
2629 /* result_len = count * to_len + self_len; */
2630 product = count * to_len;
2631 if (product / to_len != count) {
2632 PyErr_SetString(PyExc_OverflowError,
2633 "replace string is too long");
2634 return NULL;
2635 }
2636 result_len = product + self_len;
2637 if (result_len < 0) {
2638 PyErr_SetString(PyExc_OverflowError,
2639 "replace string is too long");
2640 return NULL;
2641 }
2642
2643 if (! (result = (PyStringObject *)
2644 PyString_FromStringAndSize(NULL, result_len)) )
2645 return NULL;
2646
2647 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002648 result_s = PyString_AS_STRING(result);
2649
2650 /* TODO: special case single character, which doesn't need memcpy */
2651
2652 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002653 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002654 result_s += to_len;
2655 count -= 1;
2656
2657 for (i=0; i<count; i++) {
2658 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002659 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002660 result_s += to_len;
2661 }
2662
2663 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002664 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665
2666 return result;
2667}
2668
2669/* Special case for deleting a single character */
2670/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002671Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002672replace_delete_single_character(PyStringObject *self,
2673 char from_c, Py_ssize_t maxcount)
2674{
2675 char *self_s, *result_s;
2676 char *start, *next, *end;
2677 Py_ssize_t self_len, result_len;
2678 Py_ssize_t count;
2679 PyStringObject *result;
2680
2681 self_len = PyString_GET_SIZE(self);
2682 self_s = PyString_AS_STRING(self);
2683
Andrew Dalke51324072006-05-26 20:25:22 +00002684 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002685 if (count == 0) {
2686 return return_self(self);
2687 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002688
2689 result_len = self_len - count; /* from_len == 1 */
2690 assert(result_len>=0);
2691
2692 if ( (result = (PyStringObject *)
2693 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2694 return NULL;
2695 result_s = PyString_AS_STRING(result);
2696
2697 start = self_s;
2698 end = self_s + self_len;
2699 while (count-- > 0) {
2700 next = findchar(start, end-start, from_c);
2701 if (next == NULL)
2702 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002703 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704 result_s += (next-start);
2705 start = next+1;
2706 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002707 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002708
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002709 return result;
2710}
2711
2712/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2713
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002714Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002715replace_delete_substring(PyStringObject *self,
2716 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002718 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002720 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002721 Py_ssize_t count, offset;
2722 PyStringObject *result;
2723
2724 self_len = PyString_GET_SIZE(self);
2725 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002726
2727 count = countstring(self_s, self_len,
2728 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002729 0, self_len, 1,
2730 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731
2732 if (count == 0) {
2733 /* no matches */
2734 return return_self(self);
2735 }
2736
2737 result_len = self_len - (count * from_len);
2738 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002739
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740 if ( (result = (PyStringObject *)
2741 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2742 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002743
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002745
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746 start = self_s;
2747 end = self_s + self_len;
2748 while (count-- > 0) {
2749 offset = findstring(start, end-start,
2750 from_s, from_len,
2751 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002752 if (offset == -1)
2753 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002755
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002756 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002757
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 result_s += (next-start);
2759 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002760 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002761 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002762 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002763}
2764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002766Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767replace_single_character_in_place(PyStringObject *self,
2768 char from_c, char to_c,
2769 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002770{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 char *self_s, *result_s, *start, *end, *next;
2772 Py_ssize_t self_len;
2773 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002774
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002775 /* The result string will be the same size */
2776 self_s = PyString_AS_STRING(self);
2777 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002778
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002780
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002781 if (next == NULL) {
2782 /* No matches; return the original string */
2783 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002784 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002785
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002786 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002787 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 if (result == NULL)
2789 return NULL;
2790 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002791 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 /* change everything in-place, starting with this one */
2794 start = result_s + (next-self_s);
2795 *start = to_c;
2796 start++;
2797 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 while (--maxcount > 0) {
2800 next = findchar(start, end-start, from_c);
2801 if (next == NULL)
2802 break;
2803 *next = to_c;
2804 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002805 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002806
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002808}
2809
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002811Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002813 const char *from_s, Py_ssize_t from_len,
2814 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 Py_ssize_t maxcount)
2816{
2817 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002818 char *self_s;
2819 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002821
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002823
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824 self_s = PyString_AS_STRING(self);
2825 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002826
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 offset = findstring(self_s, self_len,
2828 from_s, from_len,
2829 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002830 if (offset == -1) {
2831 /* No matches; return the original string */
2832 return return_self(self);
2833 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002834
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002836 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002837 if (result == NULL)
2838 return NULL;
2839 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002840 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002841
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 /* change everything in-place, starting with this one */
2843 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002844 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 start += from_len;
2846 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002847
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848 while ( --maxcount > 0) {
2849 offset = findstring(start, end-start,
2850 from_s, from_len,
2851 0, end-start, FORWARD);
2852 if (offset==-1)
2853 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002854 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855 start += offset+from_len;
2856 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002857
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002858 return result;
2859}
2860
2861/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002862Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863replace_single_character(PyStringObject *self,
2864 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002865 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002866 Py_ssize_t maxcount)
2867{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002868 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002870 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871 Py_ssize_t count, product;
2872 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 self_s = PyString_AS_STRING(self);
2875 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002876
Andrew Dalke51324072006-05-26 20:25:22 +00002877 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 if (count == 0) {
2879 /* no matches, return unchanged */
2880 return return_self(self);
2881 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002882
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 /* use the difference between current and new, hence the "-1" */
2884 /* result_len = self_len + count * (to_len-1) */
2885 product = count * (to_len-1);
2886 if (product / (to_len-1) != count) {
2887 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2888 return NULL;
2889 }
2890 result_len = self_len + product;
2891 if (result_len < 0) {
2892 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2893 return NULL;
2894 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002895
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002896 if ( (result = (PyStringObject *)
2897 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2898 return NULL;
2899 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002900
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 start = self_s;
2902 end = self_s + self_len;
2903 while (count-- > 0) {
2904 next = findchar(start, end-start, from_c);
2905 if (next == NULL)
2906 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002907
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 if (next == start) {
2909 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002910 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911 result_s += to_len;
2912 start += 1;
2913 } else {
2914 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002915 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002917 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002918 result_s += to_len;
2919 start = next+1;
2920 }
2921 }
2922 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002923 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002924
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002925 return result;
2926}
2927
2928/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002929Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002931 const char *from_s, Py_ssize_t from_len,
2932 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002933 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002934 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002936 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002937 Py_ssize_t count, offset, product;
2938 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002939
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002940 self_s = PyString_AS_STRING(self);
2941 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002942
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002943 count = countstring(self_s, self_len,
2944 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002945 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002946 if (count == 0) {
2947 /* no matches, return unchanged */
2948 return return_self(self);
2949 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002950
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002951 /* Check for overflow */
2952 /* result_len = self_len + count * (to_len-from_len) */
2953 product = count * (to_len-from_len);
2954 if (product / (to_len-from_len) != count) {
2955 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2956 return NULL;
2957 }
2958 result_len = self_len + product;
2959 if (result_len < 0) {
2960 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2961 return NULL;
2962 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002963
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002964 if ( (result = (PyStringObject *)
2965 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2966 return NULL;
2967 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002968
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002969 start = self_s;
2970 end = self_s + self_len;
2971 while (count-- > 0) {
2972 offset = findstring(start, end-start,
2973 from_s, from_len,
2974 0, end-start, FORWARD);
2975 if (offset == -1)
2976 break;
2977 next = start+offset;
2978 if (next == start) {
2979 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002980 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981 result_s += to_len;
2982 start += from_len;
2983 } else {
2984 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002985 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002986 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002987 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002988 result_s += to_len;
2989 start = next+from_len;
2990 }
2991 }
2992 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002993 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002994
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002995 return result;
2996}
2997
2998
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002999Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003000replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003001 const char *from_s, Py_ssize_t from_len,
3002 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003003 Py_ssize_t maxcount)
3004{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003005 if (maxcount < 0) {
3006 maxcount = PY_SSIZE_T_MAX;
3007 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3008 /* nothing to do; return the original string */
3009 return return_self(self);
3010 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003011
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 if (maxcount == 0 ||
3013 (from_len == 0 && to_len == 0)) {
3014 /* nothing to do; return the original string */
3015 return return_self(self);
3016 }
3017
3018 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003019
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003020 if (from_len == 0) {
3021 /* insert the 'to' string everywhere. */
3022 /* >>> "Python".replace("", ".") */
3023 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003024 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003025 }
3026
3027 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3028 /* point for an empty self string to generate a non-empty string */
3029 /* Special case so the remaining code always gets a non-empty string */
3030 if (PyString_GET_SIZE(self) == 0) {
3031 return return_self(self);
3032 }
3033
3034 if (to_len == 0) {
3035 /* delete all occurances of 'from' string */
3036 if (from_len == 1) {
3037 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003040 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003041 }
3042 }
3043
3044 /* Handle special case where both strings have the same length */
3045
3046 if (from_len == to_len) {
3047 if (from_len == 1) {
3048 return replace_single_character_in_place(
3049 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003050 from_s[0],
3051 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003052 maxcount);
3053 } else {
3054 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003055 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003056 }
3057 }
3058
3059 /* Otherwise use the more generic algorithms */
3060 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003061 return replace_single_character(self, from_s[0],
3062 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003063 } else {
3064 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003065 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003066 }
3067}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003069PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003070"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003071\n\
3072Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003073old replaced by new. If the optional argument count is\n\
3074given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075
3076static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003077string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003078{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003079 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003080 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003081 const char *from_s, *to_s;
3082 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003087 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003088 from_s = PyString_AS_STRING(from);
3089 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003091#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003093 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003094 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003095#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003096 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097 return NULL;
3098
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003099 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003100 to_s = PyString_AS_STRING(to);
3101 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003103#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003104 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003105 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003106 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003107#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003108 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109 return NULL;
3110
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003111 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003112 from_s, from_len,
3113 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003114}
3115
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003116/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003117
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003118/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003119 * against substr, using the start and end arguments. Returns
3120 * -1 on error, 0 if not found and 1 if found.
3121 */
3122Py_LOCAL(int)
3123_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3124 Py_ssize_t end, int direction)
3125{
3126 Py_ssize_t len = PyString_GET_SIZE(self);
3127 Py_ssize_t slen;
3128 const char* sub;
3129 const char* str;
3130
3131 if (PyString_Check(substr)) {
3132 sub = PyString_AS_STRING(substr);
3133 slen = PyString_GET_SIZE(substr);
3134 }
3135#ifdef Py_USING_UNICODE
3136 else if (PyUnicode_Check(substr))
3137 return PyUnicode_Tailmatch((PyObject *)self,
3138 substr, start, end, direction);
3139#endif
3140 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3141 return -1;
3142 str = PyString_AS_STRING(self);
3143
3144 string_adjust_indices(&start, &end, len);
3145
3146 if (direction < 0) {
3147 /* startswith */
3148 if (start+slen > len)
3149 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003150 } else {
3151 /* endswith */
3152 if (end-start < slen || start > len)
3153 return 0;
3154
3155 if (end-slen > start)
3156 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003157 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003158 if (end-start >= slen)
3159 return ! memcmp(str+start, sub, slen);
3160 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003161}
3162
3163
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003164PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003165"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003167Return True if S starts with the specified prefix, False otherwise.\n\
3168With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003169With optional end, stop comparing S at that position.\n\
3170prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171
3172static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003173string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003175 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003176 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003178 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179
Guido van Rossumc6821402000-05-08 14:08:05 +00003180 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3181 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003183 if (PyTuple_Check(subobj)) {
3184 Py_ssize_t i;
3185 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3186 result = _string_tailmatch(self,
3187 PyTuple_GET_ITEM(subobj, i),
3188 start, end, -1);
3189 if (result == -1)
3190 return NULL;
3191 else if (result) {
3192 Py_RETURN_TRUE;
3193 }
3194 }
3195 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 }
Georg Brandl24250812006-06-09 18:45:48 +00003197 result = _string_tailmatch(self, subobj, start, end, -1);
3198 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003200 else
Georg Brandl24250812006-06-09 18:45:48 +00003201 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202}
3203
3204
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003205PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003206"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003207\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003208Return True if S ends with the specified suffix, False otherwise.\n\
3209With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003210With optional end, stop comparing S at that position.\n\
3211suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212
3213static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003214string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003215{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003216 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003217 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003219 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003220
Guido van Rossumc6821402000-05-08 14:08:05 +00003221 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3222 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003224 if (PyTuple_Check(subobj)) {
3225 Py_ssize_t i;
3226 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3227 result = _string_tailmatch(self,
3228 PyTuple_GET_ITEM(subobj, i),
3229 start, end, +1);
3230 if (result == -1)
3231 return NULL;
3232 else if (result) {
3233 Py_RETURN_TRUE;
3234 }
3235 }
3236 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 }
Georg Brandl24250812006-06-09 18:45:48 +00003238 result = _string_tailmatch(self, subobj, start, end, +1);
3239 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003240 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003241 else
Georg Brandl24250812006-06-09 18:45:48 +00003242 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003243}
3244
3245
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003246PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003247"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003248\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003249Encodes S using the codec registered for encoding. encoding defaults\n\
3250to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003251handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003252a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3253'xmlcharrefreplace' as well as any other name registered with\n\
3254codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003255
3256static PyObject *
3257string_encode(PyStringObject *self, PyObject *args)
3258{
3259 char *encoding = NULL;
3260 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003261 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003262
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003263 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3264 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003265 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003266 if (v == NULL)
3267 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003268 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3269 PyErr_Format(PyExc_TypeError,
3270 "encoder did not return a string/unicode object "
3271 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003272 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003273 Py_DECREF(v);
3274 return NULL;
3275 }
3276 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003277
3278 onError:
3279 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003280}
3281
3282
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003283PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003284"S.decode([encoding[,errors]]) -> object\n\
3285\n\
3286Decodes S using the codec registered for encoding. encoding defaults\n\
3287to the default encoding. errors may be given to set a different error\n\
3288handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003289a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3290as well as any other name registerd with codecs.register_error that is\n\
3291able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003292
3293static PyObject *
3294string_decode(PyStringObject *self, PyObject *args)
3295{
3296 char *encoding = NULL;
3297 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003298 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003299
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003300 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3301 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003302 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003303 if (v == NULL)
3304 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003305 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3306 PyErr_Format(PyExc_TypeError,
3307 "decoder did not return a string/unicode object "
3308 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003309 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003310 Py_DECREF(v);
3311 return NULL;
3312 }
3313 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003314
3315 onError:
3316 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003317}
3318
3319
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003320PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321"S.expandtabs([tabsize]) -> string\n\
3322\n\
3323Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003324If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003325
3326static PyObject*
3327string_expandtabs(PyStringObject *self, PyObject *args)
3328{
3329 const char *e, *p;
3330 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003331 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003332 PyObject *u;
3333 int tabsize = 8;
3334
3335 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3336 return NULL;
3337
Thomas Wouters7e474022000-07-16 12:04:32 +00003338 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003339 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003340 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3341 for (p = PyString_AS_STRING(self); p < e; p++)
3342 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003343 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003345 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003346 PyErr_SetString(PyExc_OverflowError,
3347 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003348 return NULL;
3349 }
3350 old_j = j;
3351 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 }
3353 else {
3354 j++;
3355 if (*p == '\n' || *p == '\r') {
3356 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003357 old_j = j = 0;
3358 if (i < 0) {
3359 PyErr_SetString(PyExc_OverflowError,
3360 "new string is too long");
3361 return NULL;
3362 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 }
3364 }
3365
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003366 if ((i + j) < 0) {
3367 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3368 return NULL;
3369 }
3370
Guido van Rossum4c08d552000-03-10 22:55:18 +00003371 /* Second pass: create output string and fill it */
3372 u = PyString_FromStringAndSize(NULL, i + j);
3373 if (!u)
3374 return NULL;
3375
3376 j = 0;
3377 q = PyString_AS_STRING(u);
3378
3379 for (p = PyString_AS_STRING(self); p < e; p++)
3380 if (*p == '\t') {
3381 if (tabsize > 0) {
3382 i = tabsize - (j % tabsize);
3383 j += i;
3384 while (i--)
3385 *q++ = ' ';
3386 }
3387 }
3388 else {
3389 j++;
3390 *q++ = *p;
3391 if (*p == '\n' || *p == '\r')
3392 j = 0;
3393 }
3394
3395 return u;
3396}
3397
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003398Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003399pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003400{
3401 PyObject *u;
3402
3403 if (left < 0)
3404 left = 0;
3405 if (right < 0)
3406 right = 0;
3407
Tim Peters8fa5dd02001-09-12 02:18:30 +00003408 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409 Py_INCREF(self);
3410 return (PyObject *)self;
3411 }
3412
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003413 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414 left + PyString_GET_SIZE(self) + right);
3415 if (u) {
3416 if (left)
3417 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003418 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003419 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420 PyString_GET_SIZE(self));
3421 if (right)
3422 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3423 fill, right);
3424 }
3425
3426 return u;
3427}
3428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003429PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003430"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003431"\n"
3432"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003433"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434
3435static PyObject *
3436string_ljust(PyStringObject *self, PyObject *args)
3437{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003438 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003439 char fillchar = ' ';
3440
Thomas Wouters4abb3662006-04-19 14:50:15 +00003441 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442 return NULL;
3443
Tim Peters8fa5dd02001-09-12 02:18:30 +00003444 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445 Py_INCREF(self);
3446 return (PyObject*) self;
3447 }
3448
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003449 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003450}
3451
3452
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003453PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003454"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003455"\n"
3456"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003457"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458
3459static PyObject *
3460string_rjust(PyStringObject *self, PyObject *args)
3461{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003462 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003463 char fillchar = ' ';
3464
Thomas Wouters4abb3662006-04-19 14:50:15 +00003465 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466 return NULL;
3467
Tim Peters8fa5dd02001-09-12 02:18:30 +00003468 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469 Py_INCREF(self);
3470 return (PyObject*) self;
3471 }
3472
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474}
3475
3476
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003477PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003478"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003479"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003480"Return S centered in a string of length width. Padding is\n"
3481"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003482
3483static PyObject *
3484string_center(PyStringObject *self, PyObject *args)
3485{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003486 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003487 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003488 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003489
Thomas Wouters4abb3662006-04-19 14:50:15 +00003490 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003491 return NULL;
3492
Tim Peters8fa5dd02001-09-12 02:18:30 +00003493 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003494 Py_INCREF(self);
3495 return (PyObject*) self;
3496 }
3497
3498 marg = width - PyString_GET_SIZE(self);
3499 left = marg / 2 + (marg & width & 1);
3500
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003501 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502}
3503
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003504PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003505"S.zfill(width) -> string\n"
3506"\n"
3507"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003508"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003509
3510static PyObject *
3511string_zfill(PyStringObject *self, PyObject *args)
3512{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003513 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003514 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003515 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003516 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003517
Thomas Wouters4abb3662006-04-19 14:50:15 +00003518 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003519 return NULL;
3520
3521 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003522 if (PyString_CheckExact(self)) {
3523 Py_INCREF(self);
3524 return (PyObject*) self;
3525 }
3526 else
3527 return PyString_FromStringAndSize(
3528 PyString_AS_STRING(self),
3529 PyString_GET_SIZE(self)
3530 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003531 }
3532
3533 fill = width - PyString_GET_SIZE(self);
3534
3535 s = pad(self, fill, 0, '0');
3536
3537 if (s == NULL)
3538 return NULL;
3539
3540 p = PyString_AS_STRING(s);
3541 if (p[fill] == '+' || p[fill] == '-') {
3542 /* move sign to beginning of string */
3543 p[0] = p[fill];
3544 p[fill] = '0';
3545 }
3546
3547 return (PyObject*) s;
3548}
3549
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003550PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003551"S.isspace() -> bool\n\
3552\n\
3553Return True if all characters in S are whitespace\n\
3554and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555
3556static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003557string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558{
Fred Drakeba096332000-07-09 07:04:36 +00003559 register const unsigned char *p
3560 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003561 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562
Guido van Rossum4c08d552000-03-10 22:55:18 +00003563 /* Shortcut for single character strings */
3564 if (PyString_GET_SIZE(self) == 1 &&
3565 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003566 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003567
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003568 /* Special case for empty strings */
3569 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003571
Guido van Rossum4c08d552000-03-10 22:55:18 +00003572 e = p + PyString_GET_SIZE(self);
3573 for (; p < e; p++) {
3574 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003576 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578}
3579
3580
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003581PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003583\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003584Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003585and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586
3587static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003588string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589{
Fred Drakeba096332000-07-09 07:04:36 +00003590 register const unsigned char *p
3591 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592 register const unsigned char *e;
3593
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594 /* Shortcut for single character strings */
3595 if (PyString_GET_SIZE(self) == 1 &&
3596 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003597 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003598
3599 /* Special case for empty strings */
3600 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602
3603 e = p + PyString_GET_SIZE(self);
3604 for (; p < e; p++) {
3605 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609}
3610
3611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003612PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003614\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003615Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003616and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003617
3618static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003619string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003620{
Fred Drakeba096332000-07-09 07:04:36 +00003621 register const unsigned char *p
3622 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623 register const unsigned char *e;
3624
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003625 /* Shortcut for single character strings */
3626 if (PyString_GET_SIZE(self) == 1 &&
3627 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003628 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003629
3630 /* Special case for empty strings */
3631 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003633
3634 e = p + PyString_GET_SIZE(self);
3635 for (; p < e; p++) {
3636 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003638 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003640}
3641
3642
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003643PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003646Return True if all characters in S are digits\n\
3647and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648
3649static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003650string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651{
Fred Drakeba096332000-07-09 07:04:36 +00003652 register const unsigned char *p
3653 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003654 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656 /* Shortcut for single character strings */
3657 if (PyString_GET_SIZE(self) == 1 &&
3658 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003659 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003661 /* Special case for empty strings */
3662 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003664
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 e = p + PyString_GET_SIZE(self);
3666 for (; p < e; p++) {
3667 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671}
3672
3673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003674PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003678at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003679
3680static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003681string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682{
Fred Drakeba096332000-07-09 07:04:36 +00003683 register const unsigned char *p
3684 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003685 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 int cased;
3687
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688 /* Shortcut for single character strings */
3689 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003692 /* Special case for empty strings */
3693 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003695
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 e = p + PyString_GET_SIZE(self);
3697 cased = 0;
3698 for (; p < e; p++) {
3699 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003700 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003701 else if (!cased && islower(*p))
3702 cased = 1;
3703 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705}
3706
3707
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003708PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003709"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003711Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003712at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713
3714static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003715string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716{
Fred Drakeba096332000-07-09 07:04:36 +00003717 register const unsigned char *p
3718 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003719 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 int cased;
3721
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 /* Shortcut for single character strings */
3723 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003726 /* Special case for empty strings */
3727 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003729
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 e = p + PyString_GET_SIZE(self);
3731 cased = 0;
3732 for (; p < e; p++) {
3733 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735 else if (!cased && isupper(*p))
3736 cased = 1;
3737 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003738 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739}
3740
3741
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003742PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003743"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003745Return True if S is a titlecased string and there is at least one\n\
3746character in S, i.e. uppercase characters may only follow uncased\n\
3747characters and lowercase characters only cased ones. Return False\n\
3748otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749
3750static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003751string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752{
Fred Drakeba096332000-07-09 07:04:36 +00003753 register const unsigned char *p
3754 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003755 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 int cased, previous_is_cased;
3757
Guido van Rossum4c08d552000-03-10 22:55:18 +00003758 /* Shortcut for single character strings */
3759 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003762 /* Special case for empty strings */
3763 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003765
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 e = p + PyString_GET_SIZE(self);
3767 cased = 0;
3768 previous_is_cased = 0;
3769 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003770 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771
3772 if (isupper(ch)) {
3773 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003774 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775 previous_is_cased = 1;
3776 cased = 1;
3777 }
3778 else if (islower(ch)) {
3779 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003780 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781 previous_is_cased = 1;
3782 cased = 1;
3783 }
3784 else
3785 previous_is_cased = 0;
3786 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003787 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788}
3789
3790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003791PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003792"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793\n\
3794Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003795Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003796is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798static PyObject*
3799string_splitlines(PyStringObject *self, PyObject *args)
3800{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003801 register Py_ssize_t i;
3802 register Py_ssize_t j;
3803 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003804 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805 PyObject *list;
3806 PyObject *str;
3807 char *data;
3808
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 return NULL;
3811
3812 data = PyString_AS_STRING(self);
3813 len = PyString_GET_SIZE(self);
3814
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003815 /* This does not use the preallocated list because splitlines is
3816 usually run with hundreds of newlines. The overhead of
3817 switching between PyList_SET_ITEM and append causes about a
3818 2-3% slowdown for that common case. A smarter implementation
3819 could move the if check out, so the SET_ITEMs are done first
3820 and the appends only done when the prealloc buffer is full.
3821 That's too much work for little gain.*/
3822
Guido van Rossum4c08d552000-03-10 22:55:18 +00003823 list = PyList_New(0);
3824 if (!list)
3825 goto onError;
3826
3827 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003828 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003829
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 /* Find a line and append it */
3831 while (i < len && data[i] != '\n' && data[i] != '\r')
3832 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003833
3834 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003835 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836 if (i < len) {
3837 if (data[i] == '\r' && i + 1 < len &&
3838 data[i+1] == '\n')
3839 i += 2;
3840 else
3841 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003842 if (keepends)
3843 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003845 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003846 j = i;
3847 }
3848 if (j < len) {
3849 SPLIT_APPEND(data, j, len);
3850 }
3851
3852 return list;
3853
3854 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003855 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003856 return NULL;
3857}
3858
3859#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003860#undef SPLIT_ADD
3861#undef MAX_PREALLOC
3862#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003863
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003864static PyObject *
3865string_getnewargs(PyStringObject *v)
3866{
Martin v. Löwis68192102007-07-21 06:55:02 +00003867 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003868}
3869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003870
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003871static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003872string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003873 /* Counterparts of the obsolete stropmodule functions; except
3874 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003875 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3876 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003877 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003878 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3879 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003880 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3881 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3882 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3883 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3884 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3885 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3886 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003887 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3888 capitalize__doc__},
3889 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3890 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3891 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003892 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003893 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3894 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3895 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3896 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3897 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3898 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3899 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003900 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3901 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003902 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3903 startswith__doc__},
3904 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3905 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3906 swapcase__doc__},
3907 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3908 translate__doc__},
3909 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3910 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3911 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3912 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3913 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3914 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3915 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3916 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3917 expandtabs__doc__},
3918 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3919 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003920 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003921 {NULL, NULL} /* sentinel */
3922};
3923
Jeremy Hylton938ace62002-07-17 16:30:39 +00003924static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003925str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3926
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003927static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003928string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003929{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003930 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003931 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003932
Guido van Rossumae960af2001-08-30 03:11:59 +00003933 if (type != &PyString_Type)
3934 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003935 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3936 return NULL;
3937 if (x == NULL)
3938 return PyString_FromString("");
3939 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003940}
3941
Guido van Rossumae960af2001-08-30 03:11:59 +00003942static PyObject *
3943str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3944{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003945 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003946 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003947
3948 assert(PyType_IsSubtype(type, &PyString_Type));
3949 tmp = string_new(&PyString_Type, args, kwds);
3950 if (tmp == NULL)
3951 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003952 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003953 n = PyString_GET_SIZE(tmp);
3954 pnew = type->tp_alloc(type, n);
3955 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003956 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003957 ((PyStringObject *)pnew)->ob_shash =
3958 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003959 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003960 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003961 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003962 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003963}
3964
Guido van Rossumcacfc072002-05-24 19:01:59 +00003965static PyObject *
3966basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3967{
3968 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003969 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003970 return NULL;
3971}
3972
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003973static PyObject *
3974string_mod(PyObject *v, PyObject *w)
3975{
3976 if (!PyString_Check(v)) {
3977 Py_INCREF(Py_NotImplemented);
3978 return Py_NotImplemented;
3979 }
3980 return PyString_Format(v, w);
3981}
3982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003983PyDoc_STRVAR(basestring_doc,
3984"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003985
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003986static PyNumberMethods string_as_number = {
3987 0, /*nb_add*/
3988 0, /*nb_subtract*/
3989 0, /*nb_multiply*/
3990 0, /*nb_divide*/
3991 string_mod, /*nb_remainder*/
3992};
3993
3994
Guido van Rossumcacfc072002-05-24 19:01:59 +00003995PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00003996 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003997 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003998 0,
3999 0,
4000 0, /* tp_dealloc */
4001 0, /* tp_print */
4002 0, /* tp_getattr */
4003 0, /* tp_setattr */
4004 0, /* tp_compare */
4005 0, /* tp_repr */
4006 0, /* tp_as_number */
4007 0, /* tp_as_sequence */
4008 0, /* tp_as_mapping */
4009 0, /* tp_hash */
4010 0, /* tp_call */
4011 0, /* tp_str */
4012 0, /* tp_getattro */
4013 0, /* tp_setattro */
4014 0, /* tp_as_buffer */
4015 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4016 basestring_doc, /* tp_doc */
4017 0, /* tp_traverse */
4018 0, /* tp_clear */
4019 0, /* tp_richcompare */
4020 0, /* tp_weaklistoffset */
4021 0, /* tp_iter */
4022 0, /* tp_iternext */
4023 0, /* tp_methods */
4024 0, /* tp_members */
4025 0, /* tp_getset */
4026 &PyBaseObject_Type, /* tp_base */
4027 0, /* tp_dict */
4028 0, /* tp_descr_get */
4029 0, /* tp_descr_set */
4030 0, /* tp_dictoffset */
4031 0, /* tp_init */
4032 0, /* tp_alloc */
4033 basestring_new, /* tp_new */
4034 0, /* tp_free */
4035};
4036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004037PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004038"str(object) -> string\n\
4039\n\
4040Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004041If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004044 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004045 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004048 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004049 (printfunc)string_print, /* tp_print */
4050 0, /* tp_getattr */
4051 0, /* tp_setattr */
4052 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004053 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004054 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004055 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004056 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004057 (hashfunc)string_hash, /* tp_hash */
4058 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004059 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004060 PyObject_GenericGetAttr, /* tp_getattro */
4061 0, /* tp_setattro */
4062 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004063 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004064 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004065 string_doc, /* tp_doc */
4066 0, /* tp_traverse */
4067 0, /* tp_clear */
4068 (richcmpfunc)string_richcompare, /* tp_richcompare */
4069 0, /* tp_weaklistoffset */
4070 0, /* tp_iter */
4071 0, /* tp_iternext */
4072 string_methods, /* tp_methods */
4073 0, /* tp_members */
4074 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004075 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004076 0, /* tp_dict */
4077 0, /* tp_descr_get */
4078 0, /* tp_descr_set */
4079 0, /* tp_dictoffset */
4080 0, /* tp_init */
4081 0, /* tp_alloc */
4082 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004083 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084};
4085
4086void
Fred Drakeba096332000-07-09 07:04:36 +00004087PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004089 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004090 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004091 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004092 if (w == NULL || !PyString_Check(*pv)) {
4093 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004094 *pv = NULL;
4095 return;
4096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004097 v = string_concat((PyStringObject *) *pv, w);
4098 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004099 *pv = v;
4100}
4101
Guido van Rossum013142a1994-08-30 08:19:36 +00004102void
Fred Drakeba096332000-07-09 07:04:36 +00004103PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004104{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 PyString_Concat(pv, w);
4106 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004107}
4108
4109
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110/* The following function breaks the notion that strings are immutable:
4111 it changes the size of a string. We get away with this only if there
4112 is only one module referencing the object. You can also think of it
4113 as creating a new string object and destroying the old one, only
4114 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004115 already be known to some other part of the code...
4116 Note that if there's not enough memory to resize the string, the original
4117 string object at *pv is deallocated, *pv is set to NULL, an "out of
4118 memory" exception is set, and -1 is returned. Else (on success) 0 is
4119 returned, and the value in *pv may or may not be the same as on input.
4120 As always, an extra byte is allocated for a trailing \0 byte (newsize
4121 does *not* include that), and a trailing \0 byte is stored.
4122*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004123
4124int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004125_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004126{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004127 register PyObject *v;
4128 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004129 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004130 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004131 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004132 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004133 Py_DECREF(v);
4134 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004135 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004136 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004137 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004138 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004139 _Py_ForgetReference(v);
4140 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004141 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004142 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004143 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004144 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004145 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004146 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004147 _Py_NewReference(*pv);
4148 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004149 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004150 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004151 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004152 return 0;
4153}
Guido van Rossume5372401993-03-16 12:15:04 +00004154
4155/* Helpers for formatstring */
4156
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004157Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004158getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004159{
Thomas Wouters977485d2006-02-16 15:59:12 +00004160 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004161 if (argidx < arglen) {
4162 (*p_argidx)++;
4163 if (arglen < 0)
4164 return args;
4165 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004166 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004167 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004168 PyErr_SetString(PyExc_TypeError,
4169 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004170 return NULL;
4171}
4172
Tim Peters38fd5b62000-09-21 05:43:11 +00004173/* Format codes
4174 * F_LJUST '-'
4175 * F_SIGN '+'
4176 * F_BLANK ' '
4177 * F_ALT '#'
4178 * F_ZERO '0'
4179 */
Guido van Rossume5372401993-03-16 12:15:04 +00004180#define F_LJUST (1<<0)
4181#define F_SIGN (1<<1)
4182#define F_BLANK (1<<2)
4183#define F_ALT (1<<3)
4184#define F_ZERO (1<<4)
4185
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004186Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004187formatfloat(char *buf, size_t buflen, int flags,
4188 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004189{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004190 /* fmt = '%#.' + `prec` + `type`
4191 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004192 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004193 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004194 x = PyFloat_AsDouble(v);
4195 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004196 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004197 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004198 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004199 }
Guido van Rossume5372401993-03-16 12:15:04 +00004200 if (prec < 0)
4201 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004202 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4203 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004204 /* Worst case length calc to ensure no buffer overrun:
4205
4206 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004207 fmt = %#.<prec>g
4208 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004209 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004210 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004211
4212 'f' formats:
4213 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4214 len = 1 + 50 + 1 + prec = 52 + prec
4215
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004216 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004217 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004218
4219 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004220 if (((type == 'g' || type == 'G') &&
4221 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004222 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004223 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004224 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004225 return -1;
4226 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004227 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4228 (flags&F_ALT) ? "#" : "",
4229 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004230 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004231 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004232}
4233
Tim Peters38fd5b62000-09-21 05:43:11 +00004234/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4235 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4236 * Python's regular ints.
4237 * Return value: a new PyString*, or NULL if error.
4238 * . *pbuf is set to point into it,
4239 * *plen set to the # of chars following that.
4240 * Caller must decref it when done using pbuf.
4241 * The string starting at *pbuf is of the form
4242 * "-"? ("0x" | "0X")? digit+
4243 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004244 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004245 * There will be at least prec digits, zero-filled on the left if
4246 * necessary to get that many.
4247 * val object to be converted
4248 * flags bitmask of format flags; only F_ALT is looked at
4249 * prec minimum number of digits; 0-fill on left if needed
4250 * type a character in [duoxX]; u acts the same as d
4251 *
4252 * CAUTION: o, x and X conversions on regular ints can never
4253 * produce a '-' sign, but can for Python's unbounded ints.
4254 */
4255PyObject*
4256_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4257 char **pbuf, int *plen)
4258{
4259 PyObject *result = NULL;
4260 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004261 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004262 int sign; /* 1 if '-', else 0 */
4263 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004264 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004265 int numdigits; /* len == numnondigits + numdigits */
4266 int numnondigits = 0;
4267
4268 switch (type) {
4269 case 'd':
4270 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004271 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004272 break;
4273 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004274 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004275 break;
4276 case 'x':
4277 case 'X':
4278 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004279 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004280 break;
4281 default:
4282 assert(!"'type' not in [duoxX]");
4283 }
4284 if (!result)
4285 return NULL;
4286
Neal Norwitz56423e52006-08-13 18:11:08 +00004287 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004288 if (!buf) {
4289 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004290 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004291 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004292
Tim Peters38fd5b62000-09-21 05:43:11 +00004293 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004294 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004295 PyErr_BadInternalCall();
4296 return NULL;
4297 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004298 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004299 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004300 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4301 return NULL;
4302 }
4303 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004304 if (buf[len-1] == 'L') {
4305 --len;
4306 buf[len] = '\0';
4307 }
4308 sign = buf[0] == '-';
4309 numnondigits += sign;
4310 numdigits = len - numnondigits;
4311 assert(numdigits > 0);
4312
Tim Petersfff53252001-04-12 18:38:48 +00004313 /* Get rid of base marker unless F_ALT */
4314 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 /* Need to skip 0x, 0X or 0. */
4316 int skipped = 0;
4317 switch (type) {
4318 case 'o':
4319 assert(buf[sign] == '0');
4320 /* If 0 is only digit, leave it alone. */
4321 if (numdigits > 1) {
4322 skipped = 1;
4323 --numdigits;
4324 }
4325 break;
4326 case 'x':
4327 case 'X':
4328 assert(buf[sign] == '0');
4329 assert(buf[sign + 1] == 'x');
4330 skipped = 2;
4331 numnondigits -= 2;
4332 break;
4333 }
4334 if (skipped) {
4335 buf += skipped;
4336 len -= skipped;
4337 if (sign)
4338 buf[0] = '-';
4339 }
4340 assert(len == numnondigits + numdigits);
4341 assert(numdigits > 0);
4342 }
4343
4344 /* Fill with leading zeroes to meet minimum width. */
4345 if (prec > numdigits) {
4346 PyObject *r1 = PyString_FromStringAndSize(NULL,
4347 numnondigits + prec);
4348 char *b1;
4349 if (!r1) {
4350 Py_DECREF(result);
4351 return NULL;
4352 }
4353 b1 = PyString_AS_STRING(r1);
4354 for (i = 0; i < numnondigits; ++i)
4355 *b1++ = *buf++;
4356 for (i = 0; i < prec - numdigits; i++)
4357 *b1++ = '0';
4358 for (i = 0; i < numdigits; i++)
4359 *b1++ = *buf++;
4360 *b1 = '\0';
4361 Py_DECREF(result);
4362 result = r1;
4363 buf = PyString_AS_STRING(result);
4364 len = numnondigits + prec;
4365 }
4366
4367 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004368 if (type == 'X') {
4369 /* Need to convert all lower case letters to upper case.
4370 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004371 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004372 if (buf[i] >= 'a' && buf[i] <= 'x')
4373 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004374 }
4375 *pbuf = buf;
4376 *plen = len;
4377 return result;
4378}
4379
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004380Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004381formatint(char *buf, size_t buflen, int flags,
4382 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004383{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004384 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004385 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4386 + 1 + 1 = 24 */
4387 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004388 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004389 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004391 x = PyInt_AsLong(v);
4392 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004393 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004394 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004395 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004396 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004397 if (x < 0 && type == 'u') {
4398 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004399 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004400 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4401 sign = "-";
4402 else
4403 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004404 if (prec < 0)
4405 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004406
4407 if ((flags & F_ALT) &&
4408 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004409 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004410 * of issues that cause pain:
4411 * - when 0 is being converted, the C standard leaves off
4412 * the '0x' or '0X', which is inconsistent with other
4413 * %#x/%#X conversions and inconsistent with Python's
4414 * hex() function
4415 * - there are platforms that violate the standard and
4416 * convert 0 with the '0x' or '0X'
4417 * (Metrowerks, Compaq Tru64)
4418 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004419 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004420 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004421 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004422 * We can achieve the desired consistency by inserting our
4423 * own '0x' or '0X' prefix, and substituting %x/%X in place
4424 * of %#x/%#X.
4425 *
4426 * Note that this is the same approach as used in
4427 * formatint() in unicodeobject.c
4428 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004429 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4430 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004431 }
4432 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004433 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4434 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004435 prec, type);
4436 }
4437
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004438 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4439 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004440 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004441 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004442 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004443 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004444 return -1;
4445 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004446 if (sign[0])
4447 PyOS_snprintf(buf, buflen, fmt, -x);
4448 else
4449 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004450 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004451}
4452
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004453Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004454formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004455{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004456 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004457 if (PyString_Check(v)) {
4458 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004459 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 }
4461 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004462 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004463 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004464 }
4465 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004466 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004467}
4468
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004469/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4470
4471 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4472 chars are formatted. XXX This is a magic number. Each formatting
4473 routine does bounds checking to ensure no overflow, but a better
4474 solution may be to malloc a buffer of appropriate size for each
4475 format. For now, the current solution is sufficient.
4476*/
4477#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004478
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004479PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004480PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004481{
4482 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004483 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004484 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004485 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004486 PyObject *result, *orig_args;
4487#ifdef Py_USING_UNICODE
4488 PyObject *v, *w;
4489#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 PyObject *dict = NULL;
4491 if (format == NULL || !PyString_Check(format) || args == NULL) {
4492 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004493 return NULL;
4494 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004495 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004496 fmt = PyString_AS_STRING(format);
4497 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004498 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004499 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004500 if (result == NULL)
4501 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 res = PyString_AsString(result);
4503 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004504 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004505 argidx = 0;
4506 }
4507 else {
4508 arglen = -1;
4509 argidx = -2;
4510 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004511 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004512 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004514 while (--fmtcnt >= 0) {
4515 if (*fmt != '%') {
4516 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004517 rescnt = fmtcnt + 100;
4518 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004519 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004520 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004521 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004522 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004523 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004524 }
4525 *res++ = *fmt++;
4526 }
4527 else {
4528 /* Got a format specifier */
4529 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004530 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004531 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004532 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004533 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 PyObject *v = NULL;
4535 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004536 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004537 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004538 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004539 char formatbuf[FORMATBUFLEN];
4540 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004541#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004542 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004543 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004544#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004545
Guido van Rossumda9c2711996-12-05 21:58:58 +00004546 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004547 if (*fmt == '(') {
4548 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004549 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004551 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004552
4553 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004555 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004556 goto error;
4557 }
4558 ++fmt;
4559 --fmtcnt;
4560 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004561 /* Skip over balanced parentheses */
4562 while (pcount > 0 && --fmtcnt >= 0) {
4563 if (*fmt == ')')
4564 --pcount;
4565 else if (*fmt == '(')
4566 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004567 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004568 }
4569 keylen = fmt - keystart - 1;
4570 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004572 "incomplete format key");
4573 goto error;
4574 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 key = PyString_FromStringAndSize(keystart,
4576 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004577 if (key == NULL)
4578 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004579 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004581 args_owned = 0;
4582 }
4583 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004585 if (args == NULL) {
4586 goto error;
4587 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004588 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004589 arglen = -1;
4590 argidx = -2;
4591 }
Guido van Rossume5372401993-03-16 12:15:04 +00004592 while (--fmtcnt >= 0) {
4593 switch (c = *fmt++) {
4594 case '-': flags |= F_LJUST; continue;
4595 case '+': flags |= F_SIGN; continue;
4596 case ' ': flags |= F_BLANK; continue;
4597 case '#': flags |= F_ALT; continue;
4598 case '0': flags |= F_ZERO; continue;
4599 }
4600 break;
4601 }
4602 if (c == '*') {
4603 v = getnextarg(args, arglen, &argidx);
4604 if (v == NULL)
4605 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004606 if (!PyInt_Check(v)) {
4607 PyErr_SetString(PyExc_TypeError,
4608 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004609 goto error;
4610 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004611 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004612 if (width < 0) {
4613 flags |= F_LJUST;
4614 width = -width;
4615 }
Guido van Rossume5372401993-03-16 12:15:04 +00004616 if (--fmtcnt >= 0)
4617 c = *fmt++;
4618 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004619 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004620 width = c - '0';
4621 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004622 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004623 if (!isdigit(c))
4624 break;
4625 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004626 PyErr_SetString(
4627 PyExc_ValueError,
4628 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004629 goto error;
4630 }
4631 width = width*10 + (c - '0');
4632 }
4633 }
4634 if (c == '.') {
4635 prec = 0;
4636 if (--fmtcnt >= 0)
4637 c = *fmt++;
4638 if (c == '*') {
4639 v = getnextarg(args, arglen, &argidx);
4640 if (v == NULL)
4641 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004642 if (!PyInt_Check(v)) {
4643 PyErr_SetString(
4644 PyExc_TypeError,
4645 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004646 goto error;
4647 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004648 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004649 if (prec < 0)
4650 prec = 0;
4651 if (--fmtcnt >= 0)
4652 c = *fmt++;
4653 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004654 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004655 prec = c - '0';
4656 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004657 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004658 if (!isdigit(c))
4659 break;
4660 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004661 PyErr_SetString(
4662 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004663 "prec too big");
4664 goto error;
4665 }
4666 prec = prec*10 + (c - '0');
4667 }
4668 }
4669 } /* prec */
4670 if (fmtcnt >= 0) {
4671 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004672 if (--fmtcnt >= 0)
4673 c = *fmt++;
4674 }
4675 }
4676 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004677 PyErr_SetString(PyExc_ValueError,
4678 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004679 goto error;
4680 }
4681 if (c != '%') {
4682 v = getnextarg(args, arglen, &argidx);
4683 if (v == NULL)
4684 goto error;
4685 }
4686 sign = 0;
4687 fill = ' ';
4688 switch (c) {
4689 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004690 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004691 len = 1;
4692 break;
4693 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004694#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004695 if (PyUnicode_Check(v)) {
4696 fmt = fmt_start;
4697 argidx = argidx_start;
4698 goto unicode;
4699 }
Georg Brandld45014b2005-10-01 17:06:00 +00004700#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004701 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004702#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004703 if (temp != NULL && PyUnicode_Check(temp)) {
4704 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004705 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004706 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004707 goto unicode;
4708 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004709#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004710 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004711 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004712 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004713 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004714 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004715 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004716 if (!PyString_Check(temp)) {
4717 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004718 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004719 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004720 goto error;
4721 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004722 pbuf = PyString_AS_STRING(temp);
4723 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004724 if (prec >= 0 && len > prec)
4725 len = prec;
4726 break;
4727 case 'i':
4728 case 'd':
4729 case 'u':
4730 case 'o':
4731 case 'x':
4732 case 'X':
4733 if (c == 'i')
4734 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004735 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004736 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004737 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004738 prec, c, &pbuf, &ilen);
4739 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 if (!temp)
4741 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004742 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004743 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004744 else {
4745 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004746 len = formatint(pbuf,
4747 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004748 flags, prec, c, v);
4749 if (len < 0)
4750 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004751 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004752 }
4753 if (flags & F_ZERO)
4754 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004755 break;
4756 case 'e':
4757 case 'E':
4758 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004759 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004760 case 'g':
4761 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004762 if (c == 'F')
4763 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004764 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004765 len = formatfloat(pbuf, sizeof(formatbuf),
4766 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004767 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004768 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004769 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004771 fill = '0';
4772 break;
4773 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004774#ifdef Py_USING_UNICODE
4775 if (PyUnicode_Check(v)) {
4776 fmt = fmt_start;
4777 argidx = argidx_start;
4778 goto unicode;
4779 }
4780#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004781 pbuf = formatbuf;
4782 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004783 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004784 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004785 break;
4786 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004787 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004788 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004789 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004790 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004791 (Py_ssize_t)(fmt - 1 -
4792 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004793 goto error;
4794 }
4795 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004796 if (*pbuf == '-' || *pbuf == '+') {
4797 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004798 len--;
4799 }
4800 else if (flags & F_SIGN)
4801 sign = '+';
4802 else if (flags & F_BLANK)
4803 sign = ' ';
4804 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004805 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004806 }
4807 if (width < len)
4808 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004809 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004810 reslen -= rescnt;
4811 rescnt = width + fmtcnt + 100;
4812 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004813 if (reslen < 0) {
4814 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004815 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004816 return PyErr_NoMemory();
4817 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004818 if (_PyString_Resize(&result, reslen) < 0) {
4819 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004820 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004821 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004822 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004823 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004824 }
4825 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004826 if (fill != ' ')
4827 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004828 rescnt--;
4829 if (width > len)
4830 width--;
4831 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004832 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4833 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004834 assert(pbuf[1] == c);
4835 if (fill != ' ') {
4836 *res++ = *pbuf++;
4837 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004838 }
Tim Petersfff53252001-04-12 18:38:48 +00004839 rescnt -= 2;
4840 width -= 2;
4841 if (width < 0)
4842 width = 0;
4843 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004844 }
4845 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004846 do {
4847 --rescnt;
4848 *res++ = fill;
4849 } while (--width > len);
4850 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004851 if (fill == ' ') {
4852 if (sign)
4853 *res++ = sign;
4854 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004855 (c == 'x' || c == 'X')) {
4856 assert(pbuf[0] == '0');
4857 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004858 *res++ = *pbuf++;
4859 *res++ = *pbuf++;
4860 }
4861 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004862 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004863 res += len;
4864 rescnt -= len;
4865 while (--width >= len) {
4866 --rescnt;
4867 *res++ = ' ';
4868 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004869 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004871 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004872 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004873 goto error;
4874 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004875 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004876 } /* '%' */
4877 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004878 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004879 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004880 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004881 goto error;
4882 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004883 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004884 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004885 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004886 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004887 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004888
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004889#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004890 unicode:
4891 if (args_owned) {
4892 Py_DECREF(args);
4893 args_owned = 0;
4894 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004895 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004896 if (PyTuple_Check(orig_args) && argidx > 0) {
4897 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004898 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004899 v = PyTuple_New(n);
4900 if (v == NULL)
4901 goto error;
4902 while (--n >= 0) {
4903 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4904 Py_INCREF(w);
4905 PyTuple_SET_ITEM(v, n, w);
4906 }
4907 args = v;
4908 } else {
4909 Py_INCREF(orig_args);
4910 args = orig_args;
4911 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004912 args_owned = 1;
4913 /* Take what we have of the result and let the Unicode formatting
4914 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004915 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004916 if (_PyString_Resize(&result, rescnt))
4917 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004918 fmtcnt = PyString_GET_SIZE(format) - \
4919 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004920 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4921 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004922 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004923 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004924 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004925 if (v == NULL)
4926 goto error;
4927 /* Paste what we have (result) to what the Unicode formatting
4928 function returned (v) and return the result (or error) */
4929 w = PyUnicode_Concat(result, v);
4930 Py_DECREF(result);
4931 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004932 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004933 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004934#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004935
Guido van Rossume5372401993-03-16 12:15:04 +00004936 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004937 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004938 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004939 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004940 }
Guido van Rossume5372401993-03-16 12:15:04 +00004941 return NULL;
4942}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004943
Guido van Rossum2a61e741997-01-18 07:55:05 +00004944void
Fred Drakeba096332000-07-09 07:04:36 +00004945PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004946{
4947 register PyStringObject *s = (PyStringObject *)(*p);
4948 PyObject *t;
4949 if (s == NULL || !PyString_Check(s))
4950 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004951 /* If it's a string subclass, we don't really know what putting
4952 it in the interned dict might do. */
4953 if (!PyString_CheckExact(s))
4954 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004955 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004957 if (interned == NULL) {
4958 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959 if (interned == NULL) {
4960 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004961 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004963 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004964 t = PyDict_GetItem(interned, (PyObject *)s);
4965 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004966 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004967 Py_DECREF(*p);
4968 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004969 return;
4970 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004971
Armin Rigo79f7ad22004-08-07 19:27:39 +00004972 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004973 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004974 return;
4975 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004976 /* The two references in interned are not counted by refcnt.
4977 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004978 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004979 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004980}
4981
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004982void
4983PyString_InternImmortal(PyObject **p)
4984{
4985 PyString_InternInPlace(p);
4986 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4987 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4988 Py_INCREF(*p);
4989 }
4990}
4991
Guido van Rossum2a61e741997-01-18 07:55:05 +00004992
4993PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004994PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004995{
4996 PyObject *s = PyString_FromString(cp);
4997 if (s == NULL)
4998 return NULL;
4999 PyString_InternInPlace(&s);
5000 return s;
5001}
5002
Guido van Rossum8cf04761997-08-02 02:57:45 +00005003void
Fred Drakeba096332000-07-09 07:04:36 +00005004PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005005{
5006 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005007 for (i = 0; i < UCHAR_MAX + 1; i++) {
5008 Py_XDECREF(characters[i]);
5009 characters[i] = NULL;
5010 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005011 Py_XDECREF(nullstring);
5012 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005013}
Barry Warsawa903ad982001-02-23 16:40:48 +00005014
Barry Warsawa903ad982001-02-23 16:40:48 +00005015void _Py_ReleaseInternedStrings(void)
5016{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005017 PyObject *keys;
5018 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005019 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005020 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005021
5022 if (interned == NULL || !PyDict_Check(interned))
5023 return;
5024 keys = PyDict_Keys(interned);
5025 if (keys == NULL || !PyList_Check(keys)) {
5026 PyErr_Clear();
5027 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005028 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005029
5030 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5031 detector, interned strings are not forcibly deallocated; rather, we
5032 give them their stolen references back, and then clear and DECREF
5033 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005034
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005035 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005036 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5037 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005038 for (i = 0; i < n; i++) {
5039 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5040 switch (s->ob_sstate) {
5041 case SSTATE_NOT_INTERNED:
5042 /* XXX Shouldn't happen */
5043 break;
5044 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005045 Py_Refcnt(s) += 1;
5046 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005047 break;
5048 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005049 Py_Refcnt(s) += 2;
5050 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005051 break;
5052 default:
5053 Py_FatalError("Inconsistent interned string state.");
5054 }
5055 s->ob_sstate = SSTATE_NOT_INTERNED;
5056 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005057 fprintf(stderr, "total size of all interned strings: "
5058 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5059 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005060 Py_DECREF(keys);
5061 PyDict_Clear(interned);
5062 Py_DECREF(interned);
5063 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005064}