blob: 22b50d5f76d0aaa8c33c947debdbdc475b098464 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000720 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000753 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Brett Cannon01531592007-09-17 03:28:34 +0000791 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000808 Py_ssize_t size = Py_Size(op);
Brett Cannon01531592007-09-17 03:28:34 +0000809 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000810 while (size > INT_MAX) {
811 /* Very long strings cannot be written atomically.
812 * But don't write exactly INT_MAX bytes at a time
813 * to avoid memory aligment issues.
814 */
815 const int chunk_size = INT_MAX & ~0x3FFF;
816 fwrite(data, 1, chunk_size, fp);
817 data += chunk_size;
818 size -= chunk_size;
819 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000820#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000821 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#endif
Brett Cannon01531592007-09-17 03:28:34 +0000825 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000831 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
832 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
Brett Cannon01531592007-09-17 03:28:34 +0000835 str_len = Py_Size(op);
836 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000838 for (i = 0; i < str_len; i++) {
839 /* Since strings are immutable and the caller should have a
840 reference, accessing the interal buffer should not be an issue
841 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000843 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\r");
851 else if (c < ' ' || c >= 0x7f)
852 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000853 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000857 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000858 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859}
860
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000861PyObject *
862PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000865 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000866 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000867 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyErr_SetString(PyExc_OverflowError,
869 "string is too large to make repr");
870 }
871 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000873 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 }
875 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000876 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 register char c;
878 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000879 int quote;
880
Thomas Wouters7e474022000-07-16 12:04:32 +0000881 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000882 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000883 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000884 memchr(op->ob_sval, '\'', Py_Size(op)) &&
885 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 quote = '"';
887
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000890 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 /* There's at least enough room for a hex escape
892 and a closing quote. */
893 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000897 else if (c == '\t')
898 *p++ = '\\', *p++ = 't';
899 else if (c == '\n')
900 *p++ = '\\', *p++ = 'n';
901 else if (c == '\r')
902 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000903 else if (c < ' ' || c >= 0x7f) {
904 /* For performance, we don't want to call
905 PyOS_snprintf here (extra layers of
906 function call). */
907 sprintf(p, "\\x%02x", c & 0xff);
908 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000909 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000910 else
911 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000913 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000914 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000916 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000917 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000919 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920}
921
Guido van Rossum189f1df2001-05-01 16:51:53 +0000922static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000923string_repr(PyObject *op)
924{
925 return PyString_Repr(op, 1);
926}
927
928static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000929string_str(PyObject *s)
930{
Tim Petersc9933152001-10-16 20:18:24 +0000931 assert(PyString_Check(s));
932 if (PyString_CheckExact(s)) {
933 Py_INCREF(s);
934 return s;
935 }
936 else {
937 /* Subtype -- return genuine string with the same value. */
938 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000939 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000940 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000941}
942
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000944string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945{
Martin v. Löwis68192102007-07-21 06:55:02 +0000946 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947}
948
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000950string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951{
Andrew Dalke598710c2006-05-25 18:18:39 +0000952 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953 register PyStringObject *op;
954 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000955#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000956 if (PyUnicode_Check(bb))
957 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000958#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000959 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000960 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000961 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 return NULL;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000966 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000967 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000968 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000969 Py_INCREF(bb);
970 return bb;
971 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 Py_INCREF(a);
973 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000975 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000976 if (size < 0) {
977 PyErr_SetString(PyExc_OverflowError,
978 "strings are too large to concat");
979 return NULL;
980 }
981
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000982 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000983 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000984 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000986 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000987 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000988 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000989 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
990 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000991 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993#undef b
994}
995
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000997string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000999 register Py_ssize_t i;
1000 register Py_ssize_t j;
1001 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001003 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004 if (n < 0)
1005 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001006 /* watch out for overflows: the size can overflow int,
1007 * and the # of bytes needed can overflow size_t
1008 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001009 size = Py_Size(a) * n;
1010 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001011 PyErr_SetString(PyExc_OverflowError,
1012 "repeated string is too long");
1013 return NULL;
1014 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001015 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001016 Py_INCREF(a);
1017 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001018 }
Tim Peterse7c05322004-06-27 17:24:49 +00001019 nbytes = (size_t)size;
1020 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001021 PyErr_SetString(PyExc_OverflowError,
1022 "repeated string is too long");
1023 return NULL;
1024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001026 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001027 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001029 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001030 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001031 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001032 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001033 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001034 memset(op->ob_sval, a->ob_sval[0] , n);
1035 return (PyObject *) op;
1036 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i = 0;
1038 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001039 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1040 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001041 }
1042 while (i < size) {
1043 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001044 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001045 i += j;
1046 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001047 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048}
1049
1050/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1051
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001053string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001054 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001055 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056{
1057 if (i < 0)
1058 i = 0;
1059 if (j < 0)
1060 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001061 if (j > Py_Size(a))
1062 j = Py_Size(a);
1063 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001064 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001065 Py_INCREF(a);
1066 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067 }
1068 if (j < i)
1069 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001070 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001071}
1072
Guido van Rossum9284a572000-03-07 15:53:43 +00001073static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001074string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001075{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001076 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001077#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001078 if (PyUnicode_Check(sub_obj))
1079 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001082 PyErr_Format(PyExc_TypeError,
1083 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001084 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001085 return -1;
1086 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001087 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001088
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001089 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001090}
1091
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001092static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001093string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001094{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001095 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001096 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001097 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001098 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001099 return NULL;
1100 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001101 pchar = a->ob_sval[i];
1102 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001103 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001105 else {
1106#ifdef COUNT_ALLOCS
1107 one_strings++;
1108#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001109 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001110 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001111 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001112}
1113
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114static PyObject*
1115string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001116{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001118 Py_ssize_t len_a, len_b;
1119 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 PyObject *result;
1121
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001122 /* Make sure both arguments are strings. */
1123 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 result = Py_NotImplemented;
1125 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001126 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 if (a == b) {
1128 switch (op) {
1129 case Py_EQ:case Py_LE:case Py_GE:
1130 result = Py_True;
1131 goto out;
1132 case Py_NE:case Py_LT:case Py_GT:
1133 result = Py_False;
1134 goto out;
1135 }
1136 }
1137 if (op == Py_EQ) {
1138 /* Supporting Py_NE here as well does not save
1139 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001140 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001141 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001142 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001143 result = Py_True;
1144 } else {
1145 result = Py_False;
1146 }
1147 goto out;
1148 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001149 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001150 min_len = (len_a < len_b) ? len_a : len_b;
1151 if (min_len > 0) {
1152 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1153 if (c==0)
1154 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001155 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001156 c = 0;
1157 if (c == 0)
1158 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1159 switch (op) {
1160 case Py_LT: c = c < 0; break;
1161 case Py_LE: c = c <= 0; break;
1162 case Py_EQ: assert(0); break; /* unreachable */
1163 case Py_NE: c = c != 0; break;
1164 case Py_GT: c = c > 0; break;
1165 case Py_GE: c = c >= 0; break;
1166 default:
1167 result = Py_NotImplemented;
1168 goto out;
1169 }
1170 result = c ? Py_True : Py_False;
1171 out:
1172 Py_INCREF(result);
1173 return result;
1174}
1175
1176int
1177_PyString_Eq(PyObject *o1, PyObject *o2)
1178{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001179 PyStringObject *a = (PyStringObject*) o1;
1180 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001181 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001182 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001183 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001184}
1185
Guido van Rossum9bfef441993-03-29 10:43:31 +00001186static long
Fred Drakeba096332000-07-09 07:04:36 +00001187string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001188{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001189 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001190 register unsigned char *p;
1191 register long x;
1192
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 if (a->ob_shash != -1)
1194 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001195 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 p = (unsigned char *) a->ob_sval;
1197 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001199 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001200 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 if (x == -1)
1202 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001203 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 return x;
1205}
1206
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001207static PyObject*
1208string_subscript(PyStringObject* self, PyObject* item)
1209{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001210 if (PyIndex_Check(item)) {
1211 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 if (i == -1 && PyErr_Occurred())
1213 return NULL;
1214 if (i < 0)
1215 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001216 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 }
1218 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001219 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 char* source_buf;
1221 char* result_buf;
1222 PyObject* result;
1223
Tim Petersae1d0c92006-03-17 03:29:34 +00001224 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001225 PyString_GET_SIZE(self),
1226 &start, &stop, &step, &slicelength) < 0) {
1227 return NULL;
1228 }
1229
1230 if (slicelength <= 0) {
1231 return PyString_FromStringAndSize("", 0);
1232 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001233 else if (start == 0 && step == 1 &&
1234 slicelength == PyString_GET_SIZE(self) &&
1235 PyString_CheckExact(self)) {
1236 Py_INCREF(self);
1237 return (PyObject *)self;
1238 }
1239 else if (step == 1) {
1240 return PyString_FromStringAndSize(
1241 PyString_AS_STRING(self) + start,
1242 slicelength);
1243 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 else {
1245 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001246 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001247 if (result_buf == NULL)
1248 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001249
Tim Petersae1d0c92006-03-17 03:29:34 +00001250 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001251 cur += step, i++) {
1252 result_buf[i] = source_buf[cur];
1253 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001254
1255 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001256 slicelength);
1257 PyMem_Free(result_buf);
1258 return result;
1259 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001260 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001261 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001262 PyErr_Format(PyExc_TypeError,
1263 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001264 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001265 return NULL;
1266 }
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271{
1272 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001273 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001274 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001275 return -1;
1276 }
1277 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001278 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001279}
1280
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281static Py_ssize_t
1282string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001283{
Guido van Rossum045e6881997-09-08 18:30:11 +00001284 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001285 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286 return -1;
1287}
1288
Martin v. Löwis18e16552006-02-15 17:27:45 +00001289static Py_ssize_t
1290string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001291{
1292 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001293 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294 return 1;
1295}
1296
Martin v. Löwis18e16552006-02-15 17:27:45 +00001297static Py_ssize_t
1298string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001299{
1300 if ( index != 0 ) {
1301 PyErr_SetString(PyExc_SystemError,
1302 "accessing non-existent string segment");
1303 return -1;
1304 }
1305 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001306 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001307}
1308
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001309static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001311 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001312 (ssizeargfunc)string_repeat, /*sq_repeat*/
1313 (ssizeargfunc)string_item, /*sq_item*/
1314 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001315 0, /*sq_ass_item*/
1316 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001317 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001318};
1319
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001320static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001321 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322 (binaryfunc)string_subscript,
1323 0,
1324};
1325
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001326static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001327 (readbufferproc)string_buffer_getreadbuf,
1328 (writebufferproc)string_buffer_getwritebuf,
1329 (segcountproc)string_buffer_getsegcount,
1330 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001331};
1332
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333
1334
1335#define LEFTSTRIP 0
1336#define RIGHTSTRIP 1
1337#define BOTHSTRIP 2
1338
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001339/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001340static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1341
1342#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001343
Andrew Dalke525eab32006-05-26 14:00:45 +00001344
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001345/* Don't call if length < 2 */
1346#define Py_STRING_MATCH(target, offset, pattern, length) \
1347 (target[offset] == pattern[0] && \
1348 target[offset+length-1] == pattern[length-1] && \
1349 !memcmp(target+offset+1, pattern+1, length-2) )
1350
1351
Andrew Dalke525eab32006-05-26 14:00:45 +00001352/* Overallocate the initial list to reduce the number of reallocs for small
1353 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1354 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1355 text (roughly 11 words per line) and field delimited data (usually 1-10
1356 fields). For large strings the split algorithms are bandwidth limited
1357 so increasing the preallocation likely will not improve things.*/
1358
1359#define MAX_PREALLOC 12
1360
1361/* 5 splits gives 6 elements */
1362#define PREALLOC_SIZE(maxsplit) \
1363 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1364
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001365#define SPLIT_APPEND(data, left, right) \
1366 str = PyString_FromStringAndSize((data) + (left), \
1367 (right) - (left)); \
1368 if (str == NULL) \
1369 goto onError; \
1370 if (PyList_Append(list, str)) { \
1371 Py_DECREF(str); \
1372 goto onError; \
1373 } \
1374 else \
1375 Py_DECREF(str);
1376
Andrew Dalke02758d62006-05-26 15:21:01 +00001377#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001378 str = PyString_FromStringAndSize((data) + (left), \
1379 (right) - (left)); \
1380 if (str == NULL) \
1381 goto onError; \
1382 if (count < MAX_PREALLOC) { \
1383 PyList_SET_ITEM(list, count, str); \
1384 } else { \
1385 if (PyList_Append(list, str)) { \
1386 Py_DECREF(str); \
1387 goto onError; \
1388 } \
1389 else \
1390 Py_DECREF(str); \
1391 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001392 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001393
1394/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001395#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001396
Andrew Dalke02758d62006-05-26 15:21:01 +00001397#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1398#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1399#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1400#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1401
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001402Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001403split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404{
Andrew Dalke525eab32006-05-26 14:00:45 +00001405 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001406 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001407 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408
1409 if (list == NULL)
1410 return NULL;
1411
Andrew Dalke02758d62006-05-26 15:21:01 +00001412 i = j = 0;
1413
1414 while (maxsplit-- > 0) {
1415 SKIP_SPACE(s, i, len);
1416 if (i==len) break;
1417 j = i; i++;
1418 SKIP_NONSPACE(s, i, len);
1419 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001421
1422 if (i < len) {
1423 /* Only occurs when maxsplit was reached */
1424 /* Skip any remaining whitespace and copy to end of string */
1425 SKIP_SPACE(s, i, len);
1426 if (i != len)
1427 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001429 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001431 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 Py_DECREF(list);
1433 return NULL;
1434}
1435
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001436Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001437split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001438{
Andrew Dalke525eab32006-05-26 14:00:45 +00001439 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442
1443 if (list == NULL)
1444 return NULL;
1445
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001446 i = j = 0;
1447 while ((j < len) && (maxcount-- > 0)) {
1448 for(; j<len; j++) {
1449 /* I found that using memchr makes no difference */
1450 if (s[j] == ch) {
1451 SPLIT_ADD(s, i, j);
1452 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001453 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001454 }
1455 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001456 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001457 if (i <= len) {
1458 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001459 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001460 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 return list;
1462
1463 onError:
1464 Py_DECREF(list);
1465 return NULL;
1466}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001468PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469"S.split([sep [,maxsplit]]) -> list of strings\n\
1470\n\
1471Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001473splits are done. If sep is not specified or is None, any\n\
1474whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475
1476static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001477string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001479 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001480 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001482 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001483#ifdef USE_FAST
1484 Py_ssize_t pos;
1485#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486
Martin v. Löwis9c830762006-04-13 08:37:17 +00001487 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001490 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (PyString_Check(subobj)) {
1494 sub = PyString_AS_STRING(subobj);
1495 n = PyString_GET_SIZE(subobj);
1496 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001497#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001498 else if (PyUnicode_Check(subobj))
1499 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001500#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1502 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001503
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 if (n == 0) {
1505 PyErr_SetString(PyExc_ValueError, "empty separator");
1506 return NULL;
1507 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508 else if (n == 1)
1509 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510
Andrew Dalke525eab32006-05-26 14:00:45 +00001511 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 if (list == NULL)
1513 return NULL;
1514
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001515#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001517 while (maxsplit-- > 0) {
1518 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1519 if (pos < 0)
1520 break;
1521 j = i+pos;
1522 SPLIT_ADD(s, i, j);
1523 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001525#else
1526 i = j = 0;
1527 while ((j+n <= len) && (maxsplit-- > 0)) {
1528 for (; j+n <= len; j++) {
1529 if (Py_STRING_MATCH(s, j, sub, n)) {
1530 SPLIT_ADD(s, i, j);
1531 i = j = j + n;
1532 break;
1533 }
1534 }
1535 }
1536#endif
1537 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001538 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 return list;
1540
Andrew Dalke525eab32006-05-26 14:00:45 +00001541 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 Py_DECREF(list);
1543 return NULL;
1544}
1545
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546PyDoc_STRVAR(partition__doc__,
1547"S.partition(sep) -> (head, sep, tail)\n\
1548\n\
1549Searches for the separator sep in S, and returns the part before it,\n\
1550the separator itself, and the part after it. If the separator is not\n\
1551found, returns S and two empty strings.");
1552
1553static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001554string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001555{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001556 const char *sep;
1557 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001559 if (PyString_Check(sep_obj)) {
1560 sep = PyString_AS_STRING(sep_obj);
1561 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001562 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001563#ifdef Py_USING_UNICODE
1564 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001565 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001566#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001567 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001568 return NULL;
1569
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001570 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001571 (PyObject*) self,
1572 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573 sep_obj, sep, sep_len
1574 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001575}
1576
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001577PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001578"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001579\n\
1580Searches for the separator sep in S, starting at the end of S, and returns\n\
1581the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001582separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001583
1584static PyObject *
1585string_rpartition(PyStringObject *self, PyObject *sep_obj)
1586{
1587 const char *sep;
1588 Py_ssize_t sep_len;
1589
1590 if (PyString_Check(sep_obj)) {
1591 sep = PyString_AS_STRING(sep_obj);
1592 sep_len = PyString_GET_SIZE(sep_obj);
1593 }
1594#ifdef Py_USING_UNICODE
1595 else if (PyUnicode_Check(sep_obj))
1596 return PyUnicode_Partition((PyObject *) self, sep_obj);
1597#endif
1598 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1599 return NULL;
1600
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001601 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001602 (PyObject*) self,
1603 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1604 sep_obj, sep, sep_len
1605 );
1606}
1607
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001608Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001609rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001610{
Andrew Dalke525eab32006-05-26 14:00:45 +00001611 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001613 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001614
1615 if (list == NULL)
1616 return NULL;
1617
Andrew Dalke02758d62006-05-26 15:21:01 +00001618 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001619
Andrew Dalke02758d62006-05-26 15:21:01 +00001620 while (maxsplit-- > 0) {
1621 RSKIP_SPACE(s, i);
1622 if (i<0) break;
1623 j = i; i--;
1624 RSKIP_NONSPACE(s, i);
1625 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001626 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001627 if (i >= 0) {
1628 /* Only occurs when maxsplit was reached */
1629 /* Skip any remaining whitespace and copy to beginning of string */
1630 RSKIP_SPACE(s, i);
1631 if (i >= 0)
1632 SPLIT_ADD(s, 0, i + 1);
1633
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001634 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001635 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001636 if (PyList_Reverse(list) < 0)
1637 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001638 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001640 Py_DECREF(list);
1641 return NULL;
1642}
1643
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001644Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001645rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646{
Andrew Dalke525eab32006-05-26 14:00:45 +00001647 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001648 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650
1651 if (list == NULL)
1652 return NULL;
1653
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001654 i = j = len - 1;
1655 while ((i >= 0) && (maxcount-- > 0)) {
1656 for (; i >= 0; i--) {
1657 if (s[i] == ch) {
1658 SPLIT_ADD(s, i + 1, j + 1);
1659 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001660 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001661 }
1662 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001663 }
1664 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001665 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001666 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001667 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001668 if (PyList_Reverse(list) < 0)
1669 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001670 return list;
1671
1672 onError:
1673 Py_DECREF(list);
1674 return NULL;
1675}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
1677PyDoc_STRVAR(rsplit__doc__,
1678"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1679\n\
1680Return a list of the words in the string S, using sep as the\n\
1681delimiter string, starting at the end of the string and working\n\
1682to the front. If maxsplit is given, at most maxsplit splits are\n\
1683done. If sep is not specified or is None, any whitespace string\n\
1684is a separator.");
1685
1686static PyObject *
1687string_rsplit(PyStringObject *self, PyObject *args)
1688{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001689 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001690 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001691 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001692 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693
Martin v. Löwis9c830762006-04-13 08:37:17 +00001694 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695 return NULL;
1696 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001697 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 if (subobj == Py_None)
1699 return rsplit_whitespace(s, len, maxsplit);
1700 if (PyString_Check(subobj)) {
1701 sub = PyString_AS_STRING(subobj);
1702 n = PyString_GET_SIZE(subobj);
1703 }
1704#ifdef Py_USING_UNICODE
1705 else if (PyUnicode_Check(subobj))
1706 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1707#endif
1708 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1709 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001710
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001711 if (n == 0) {
1712 PyErr_SetString(PyExc_ValueError, "empty separator");
1713 return NULL;
1714 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001715 else if (n == 1)
1716 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001717
Andrew Dalke525eab32006-05-26 14:00:45 +00001718 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719 if (list == NULL)
1720 return NULL;
1721
1722 j = len;
1723 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001724
1725 while ( (i >= 0) && (maxsplit-- > 0) ) {
1726 for (; i>=0; i--) {
1727 if (Py_STRING_MATCH(s, i, sub, n)) {
1728 SPLIT_ADD(s, i + n, j);
1729 j = i;
1730 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001731 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001732 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001733 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001734 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001735 SPLIT_ADD(s, 0, j);
1736 FIX_PREALLOC_SIZE(list);
1737 if (PyList_Reverse(list) < 0)
1738 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001739 return list;
1740
Andrew Dalke525eab32006-05-26 14:00:45 +00001741onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 Py_DECREF(list);
1743 return NULL;
1744}
1745
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001747PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748"S.join(sequence) -> string\n\
1749\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001751sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
1753static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001754string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755{
1756 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001757 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001760 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001763 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764
Tim Peters19fe14e2001-01-19 03:03:47 +00001765 seq = PySequence_Fast(orig, "");
1766 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001767 return NULL;
1768 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001769
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001770 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 if (seqlen == 0) {
1772 Py_DECREF(seq);
1773 return PyString_FromString("");
1774 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001777 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1778 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001779 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001780 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001781 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001783
Raymond Hettinger674f2412004-08-23 23:23:54 +00001784 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001785 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001786 * Do a pre-pass to figure out the total amount of space we'll
1787 * need (sz), see whether any argument is absurd, and defer to
1788 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001789 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 item = PySequence_Fast_GET_ITEM(seq, i);
1793 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001794#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001796 /* Defer to Unicode join.
1797 * CAUTION: There's no gurantee that the
1798 * original sequence can be iterated over
1799 * again, so we must pass seq here.
1800 */
1801 PyObject *result;
1802 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001803 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001804 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001805 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001806#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001807 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001808 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001809 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001810 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001811 Py_DECREF(seq);
1812 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001813 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 sz += PyString_GET_SIZE(item);
1815 if (i != 0)
1816 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001817 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001818 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001819 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 Py_DECREF(seq);
1821 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 }
1824
1825 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001827 if (res == NULL) {
1828 Py_DECREF(seq);
1829 return NULL;
1830 }
1831
1832 /* Catenate everything. */
1833 p = PyString_AS_STRING(res);
1834 for (i = 0; i < seqlen; ++i) {
1835 size_t n;
1836 item = PySequence_Fast_GET_ITEM(seq, i);
1837 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001838 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001839 p += n;
1840 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001841 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001842 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001843 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001845
Jeremy Hylton49048292000-07-11 03:28:17 +00001846 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848}
1849
Tim Peters52e155e2001-06-16 05:42:57 +00001850PyObject *
1851_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001852{
Tim Petersa7259592001-06-16 05:11:17 +00001853 assert(sep != NULL && PyString_Check(sep));
1854 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001855 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001856}
1857
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001858Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001859string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001860{
1861 if (*end > len)
1862 *end = len;
1863 else if (*end < 0)
1864 *end += len;
1865 if (*end < 0)
1866 *end = 0;
1867 if (*start < 0)
1868 *start += len;
1869 if (*start < 0)
1870 *start = 0;
1871}
1872
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001873Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001874string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001876 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001877 const char *sub;
1878 Py_ssize_t sub_len;
1879 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001881 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1882 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001883 return -2;
1884 if (PyString_Check(subobj)) {
1885 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001886 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001887 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001888#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001889 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001890 return PyUnicode_Find(
1891 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001892#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001893 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001894 /* XXX - the "expected a character buffer object" is pretty
1895 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896 return -2;
1897
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001898 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001899 return stringlib_find_slice(
1900 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1901 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001902 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001903 return stringlib_rfind_slice(
1904 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1905 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001909PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910"S.find(sub [,start [,end]]) -> int\n\
1911\n\
1912Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001913such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914arguments start and end are interpreted as in slice notation.\n\
1915\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001916Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917
1918static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001919string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001921 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922 if (result == -2)
1923 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001924 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925}
1926
1927
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001928PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929"S.index(sub [,start [,end]]) -> int\n\
1930\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932
1933static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001934string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001936 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937 if (result == -2)
1938 return NULL;
1939 if (result == -1) {
1940 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001941 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 return NULL;
1943 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.rfind(sub [,start [,end]]) -> int\n\
1950\n\
1951Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001952such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953arguments start and end are interpreted as in slice notation.\n\
1954\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001955Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956
1957static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001958string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 if (result == -2)
1962 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964}
1965
1966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001967PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968"S.rindex(sub [,start [,end]]) -> int\n\
1969\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971
1972static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001973string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 if (result == -2)
1977 return NULL;
1978 if (result == -1) {
1979 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001980 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981 return NULL;
1982 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984}
1985
1986
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001987Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001988do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1989{
1990 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001991 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001992 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001993 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1994 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001995
1996 i = 0;
1997 if (striptype != RIGHTSTRIP) {
1998 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1999 i++;
2000 }
2001 }
2002
2003 j = len;
2004 if (striptype != LEFTSTRIP) {
2005 do {
2006 j--;
2007 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2008 j++;
2009 }
2010
2011 if (i == 0 && j == len && PyString_CheckExact(self)) {
2012 Py_INCREF(self);
2013 return (PyObject*)self;
2014 }
2015 else
2016 return PyString_FromStringAndSize(s+i, j-i);
2017}
2018
2019
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002020Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002021do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022{
2023 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002024 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026 i = 0;
2027 if (striptype != RIGHTSTRIP) {
2028 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2029 i++;
2030 }
2031 }
2032
2033 j = len;
2034 if (striptype != LEFTSTRIP) {
2035 do {
2036 j--;
2037 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2038 j++;
2039 }
2040
Tim Peters8fa5dd02001-09-12 02:18:30 +00002041 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042 Py_INCREF(self);
2043 return (PyObject*)self;
2044 }
2045 else
2046 return PyString_FromStringAndSize(s+i, j-i);
2047}
2048
2049
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002050Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002051do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2052{
2053 PyObject *sep = NULL;
2054
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002055 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002056 return NULL;
2057
2058 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002059 if (PyString_Check(sep))
2060 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002062 else if (PyUnicode_Check(sep)) {
2063 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2064 PyObject *res;
2065 if (uniself==NULL)
2066 return NULL;
2067 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2068 striptype, sep);
2069 Py_DECREF(uniself);
2070 return res;
2071 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002072#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002073 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002074#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002075 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002076#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002077 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002078#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002079 STRIPNAME(striptype));
2080 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081 }
2082
2083 return do_strip(self, striptype);
2084}
2085
2086
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002087PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002088"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089\n\
2090Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092If chars is given and not None, remove characters in chars instead.\n\
2093If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
2095static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, BOTHSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102}
2103
2104
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002108Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109If chars is given and not None, remove characters in chars instead.\n\
2110If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
2112static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, LEFTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002123"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002125Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002126If chars is given and not None, remove characters in chars instead.\n\
2127If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128
2129static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002130string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002132 if (PyTuple_GET_SIZE(args) == 0)
2133 return do_strip(self, RIGHTSTRIP); /* Common case */
2134 else
2135 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136}
2137
2138
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002139PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140"S.lower() -> string\n\
2141\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002142Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002144/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2145#ifndef _tolower
2146#define _tolower tolower
2147#endif
2148
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002150string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002152 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002153 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002154 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002156 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002157 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002159
2160 s = PyString_AS_STRING(newobj);
2161
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002162 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002163
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002165 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002166 if (isupper(c))
2167 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002169
Anthony Baxtera6286212006-04-11 07:42:36 +00002170 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171}
2172
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002173PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174"S.upper() -> string\n\
2175\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002176Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002178#ifndef _toupper
2179#define _toupper toupper
2180#endif
2181
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002183string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002185 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002186 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002187 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002189 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002190 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002192
2193 s = PyString_AS_STRING(newobj);
2194
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002195 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002196
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002198 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002199 if (islower(c))
2200 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002202
Anthony Baxtera6286212006-04-11 07:42:36 +00002203 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204}
2205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002206PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207"S.title() -> string\n\
2208\n\
2209Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211
2212static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002213string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214{
2215 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002216 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002218 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002219
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 newobj = PyString_FromStringAndSize(NULL, n);
2221 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 for (i = 0; i < n; i++) {
2225 int c = Py_CHARMASK(*s++);
2226 if (islower(c)) {
2227 if (!previous_is_cased)
2228 c = toupper(c);
2229 previous_is_cased = 1;
2230 } else if (isupper(c)) {
2231 if (previous_is_cased)
2232 c = tolower(c);
2233 previous_is_cased = 1;
2234 } else
2235 previous_is_cased = 0;
2236 *s_new++ = c;
2237 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002238 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239}
2240
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002241PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242"S.capitalize() -> string\n\
2243\n\
2244Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002245capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246
2247static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002248string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249{
2250 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002251 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002252 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253
Anthony Baxtera6286212006-04-11 07:42:36 +00002254 newobj = PyString_FromStringAndSize(NULL, n);
2255 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 if (0 < n) {
2259 int c = Py_CHARMASK(*s++);
2260 if (islower(c))
2261 *s_new = toupper(c);
2262 else
2263 *s_new = c;
2264 s_new++;
2265 }
2266 for (i = 1; i < n; i++) {
2267 int c = Py_CHARMASK(*s++);
2268 if (isupper(c))
2269 *s_new = tolower(c);
2270 else
2271 *s_new = c;
2272 s_new++;
2273 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002274 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275}
2276
2277
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002278PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279"S.count(sub[, start[, end]]) -> int\n\
2280\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002281Return the number of non-overlapping occurrences of substring sub in\n\
2282string S[start:end]. Optional arguments start and end are interpreted\n\
2283as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284
2285static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002286string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002288 PyObject *sub_obj;
2289 const char *str = PyString_AS_STRING(self), *sub;
2290 Py_ssize_t sub_len;
2291 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002293 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2294 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002296
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002297 if (PyString_Check(sub_obj)) {
2298 sub = PyString_AS_STRING(sub_obj);
2299 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002301#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002302 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002303 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002304 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002305 if (count == -1)
2306 return NULL;
2307 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002308 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002309 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002310#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002311 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 return NULL;
2313
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002314 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002315
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002316 return PyInt_FromSsize_t(
2317 stringlib_count(str + start, end - start, sub, sub_len)
2318 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319}
2320
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002321PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322"S.swapcase() -> string\n\
2323\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002325converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326
2327static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002328string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329{
2330 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002331 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002332 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333
Anthony Baxtera6286212006-04-11 07:42:36 +00002334 newobj = PyString_FromStringAndSize(NULL, n);
2335 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002337 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 for (i = 0; i < n; i++) {
2339 int c = Py_CHARMASK(*s++);
2340 if (islower(c)) {
2341 *s_new = toupper(c);
2342 }
2343 else if (isupper(c)) {
2344 *s_new = tolower(c);
2345 }
2346 else
2347 *s_new = c;
2348 s_new++;
2349 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002350 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351}
2352
2353
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002354PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355"S.translate(table [,deletechars]) -> string\n\
2356\n\
2357Return a copy of the string S, where all characters occurring\n\
2358in the optional argument deletechars are removed, and the\n\
2359remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002360translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361
2362static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002363string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002366 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002367 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002369 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002370 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371 PyObject *result;
2372 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002375 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378
2379 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002380 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381 tablen = PyString_GET_SIZE(tableobj);
2382 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002383 else if (tableobj == Py_None) {
2384 table = NULL;
2385 tablen = 256;
2386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002387#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002389 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 parameter; instead a mapping to None will cause characters
2391 to be deleted. */
2392 if (delobj != NULL) {
2393 PyErr_SetString(PyExc_TypeError,
2394 "deletions are implemented differently for unicode");
2395 return NULL;
2396 }
2397 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002400 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402
Martin v. Löwis00b61272002-12-12 20:03:19 +00002403 if (tablen != 256) {
2404 PyErr_SetString(PyExc_ValueError,
2405 "translation table must be 256 characters long");
2406 return NULL;
2407 }
2408
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409 if (delobj != NULL) {
2410 if (PyString_Check(delobj)) {
2411 del_table = PyString_AS_STRING(delobj);
2412 dellen = PyString_GET_SIZE(delobj);
2413 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002414#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 else if (PyUnicode_Check(delobj)) {
2416 PyErr_SetString(PyExc_TypeError,
2417 "deletions are implemented differently for unicode");
2418 return NULL;
2419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002420#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002421 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2422 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 }
2424 else {
2425 del_table = NULL;
2426 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 }
2428
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002429 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 result = PyString_FromStringAndSize((char *)NULL, inlen);
2431 if (result == NULL)
2432 return NULL;
2433 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002434 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002435
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002436 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437 /* If no deletions are required, use faster code */
2438 for (i = inlen; --i >= 0; ) {
2439 c = Py_CHARMASK(*input++);
2440 if (Py_CHARMASK((*output++ = table[c])) != c)
2441 changed = 1;
2442 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002443 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 return result;
2445 Py_DECREF(result);
2446 Py_INCREF(input_obj);
2447 return input_obj;
2448 }
2449
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002450 if (table == NULL) {
2451 for (i = 0; i < 256; i++)
2452 trans_table[i] = Py_CHARMASK(i);
2453 } else {
2454 for (i = 0; i < 256; i++)
2455 trans_table[i] = Py_CHARMASK(table[i]);
2456 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457
2458 for (i = 0; i < dellen; i++)
2459 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2460
2461 for (i = inlen; --i >= 0; ) {
2462 c = Py_CHARMASK(*input++);
2463 if (trans_table[c] != -1)
2464 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2465 continue;
2466 changed = 1;
2467 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002468 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469 Py_DECREF(result);
2470 Py_INCREF(input_obj);
2471 return input_obj;
2472 }
2473 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002474 if (inlen > 0)
2475 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476 return result;
2477}
2478
2479
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002480#define FORWARD 1
2481#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485#define findchar(target, target_len, c) \
2486 ((char *)memchr((const void *)(target), c, target_len))
2487
2488/* String ops must return a string. */
2489/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002490Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002491return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493 if (PyString_CheckExact(self)) {
2494 Py_INCREF(self);
2495 return self;
2496 }
2497 return (PyStringObject *)PyString_FromStringAndSize(
2498 PyString_AS_STRING(self),
2499 PyString_GET_SIZE(self));
2500}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002502Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002503countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002504{
2505 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002506 const char *start=target;
2507 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002509 while ( (start=findchar(start, end-start, c)) != NULL ) {
2510 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002511 if (count >= maxcount)
2512 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002513 start += 1;
2514 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002515 return count;
2516}
2517
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002518Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002519findstring(const char *target, Py_ssize_t target_len,
2520 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002521 Py_ssize_t start,
2522 Py_ssize_t end,
2523 int direction)
2524{
2525 if (start < 0) {
2526 start += target_len;
2527 if (start < 0)
2528 start = 0;
2529 }
2530 if (end > target_len) {
2531 end = target_len;
2532 } else if (end < 0) {
2533 end += target_len;
2534 if (end < 0)
2535 end = 0;
2536 }
2537
2538 /* zero-length substrings always match at the first attempt */
2539 if (pattern_len == 0)
2540 return (direction > 0) ? start : end;
2541
2542 end -= pattern_len;
2543
2544 if (direction < 0) {
2545 for (; end >= start; end--)
2546 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2547 return end;
2548 } else {
2549 for (; start <= end; start++)
2550 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2551 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 }
2553 return -1;
2554}
2555
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002556Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002557countstring(const char *target, Py_ssize_t target_len,
2558 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 Py_ssize_t start,
2560 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002561 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002564
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 if (start < 0) {
2566 start += target_len;
2567 if (start < 0)
2568 start = 0;
2569 }
2570 if (end > target_len) {
2571 end = target_len;
2572 } else if (end < 0) {
2573 end += target_len;
2574 if (end < 0)
2575 end = 0;
2576 }
2577
2578 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002579 if (pattern_len == 0 || maxcount == 0) {
2580 if (target_len+1 < maxcount)
2581 return target_len+1;
2582 return maxcount;
2583 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584
2585 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002587 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002588 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2589 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002590 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 end -= pattern_len-1;
2592 }
2593 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002594 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002595 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2596 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002597 if (--maxcount <= 0)
2598 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002599 start += pattern_len-1;
2600 }
2601 }
2602 return count;
2603}
2604
2605
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002606/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002607
2608/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002609Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002610replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002611 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002612 Py_ssize_t maxcount)
2613{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002614 char *self_s, *result_s;
2615 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002616 Py_ssize_t count, i, product;
2617 PyStringObject *result;
2618
2619 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002620
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002621 /* 1 at the end plus 1 after every character */
2622 count = self_len+1;
2623 if (maxcount < count)
2624 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002625
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002626 /* Check for overflow */
2627 /* result_len = count * to_len + self_len; */
2628 product = count * to_len;
2629 if (product / to_len != count) {
2630 PyErr_SetString(PyExc_OverflowError,
2631 "replace string is too long");
2632 return NULL;
2633 }
2634 result_len = product + self_len;
2635 if (result_len < 0) {
2636 PyErr_SetString(PyExc_OverflowError,
2637 "replace string is too long");
2638 return NULL;
2639 }
2640
2641 if (! (result = (PyStringObject *)
2642 PyString_FromStringAndSize(NULL, result_len)) )
2643 return NULL;
2644
2645 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646 result_s = PyString_AS_STRING(result);
2647
2648 /* TODO: special case single character, which doesn't need memcpy */
2649
2650 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002651 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002652 result_s += to_len;
2653 count -= 1;
2654
2655 for (i=0; i<count; i++) {
2656 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002657 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002658 result_s += to_len;
2659 }
2660
2661 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002662 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002663
2664 return result;
2665}
2666
2667/* Special case for deleting a single character */
2668/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002669Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002670replace_delete_single_character(PyStringObject *self,
2671 char from_c, Py_ssize_t maxcount)
2672{
2673 char *self_s, *result_s;
2674 char *start, *next, *end;
2675 Py_ssize_t self_len, result_len;
2676 Py_ssize_t count;
2677 PyStringObject *result;
2678
2679 self_len = PyString_GET_SIZE(self);
2680 self_s = PyString_AS_STRING(self);
2681
Andrew Dalke51324072006-05-26 20:25:22 +00002682 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002683 if (count == 0) {
2684 return return_self(self);
2685 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002686
2687 result_len = self_len - count; /* from_len == 1 */
2688 assert(result_len>=0);
2689
2690 if ( (result = (PyStringObject *)
2691 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2692 return NULL;
2693 result_s = PyString_AS_STRING(result);
2694
2695 start = self_s;
2696 end = self_s + self_len;
2697 while (count-- > 0) {
2698 next = findchar(start, end-start, from_c);
2699 if (next == NULL)
2700 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002701 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702 result_s += (next-start);
2703 start = next+1;
2704 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002705 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002706
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002707 return result;
2708}
2709
2710/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2711
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002712Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002713replace_delete_substring(PyStringObject *self,
2714 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002715 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002716 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002718 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 Py_ssize_t count, offset;
2720 PyStringObject *result;
2721
2722 self_len = PyString_GET_SIZE(self);
2723 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724
2725 count = countstring(self_s, self_len,
2726 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002727 0, self_len, 1,
2728 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002729
2730 if (count == 0) {
2731 /* no matches */
2732 return return_self(self);
2733 }
2734
2735 result_len = self_len - (count * from_len);
2736 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002737
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738 if ( (result = (PyStringObject *)
2739 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2740 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002741
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002742 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002743
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 start = self_s;
2745 end = self_s + self_len;
2746 while (count-- > 0) {
2747 offset = findstring(start, end-start,
2748 from_s, from_len,
2749 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002750 if (offset == -1)
2751 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002753
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002754 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002755
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 result_s += (next-start);
2757 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002758 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002759 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002760 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002761}
2762
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002764Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765replace_single_character_in_place(PyStringObject *self,
2766 char from_c, char to_c,
2767 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002768{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769 char *self_s, *result_s, *start, *end, *next;
2770 Py_ssize_t self_len;
2771 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002772
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773 /* The result string will be the same size */
2774 self_s = PyString_AS_STRING(self);
2775 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002776
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002777 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002778
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779 if (next == NULL) {
2780 /* No matches; return the original string */
2781 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002782 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002783
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002784 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002785 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002786 if (result == NULL)
2787 return NULL;
2788 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002789 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002790
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 /* change everything in-place, starting with this one */
2792 start = result_s + (next-self_s);
2793 *start = to_c;
2794 start++;
2795 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 while (--maxcount > 0) {
2798 next = findchar(start, end-start, from_c);
2799 if (next == NULL)
2800 break;
2801 *next = to_c;
2802 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002803 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002804
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002806}
2807
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002809Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002811 const char *from_s, Py_ssize_t from_len,
2812 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813 Py_ssize_t maxcount)
2814{
2815 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002816 char *self_s;
2817 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002819
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002821
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 self_s = PyString_AS_STRING(self);
2823 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002824
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 offset = findstring(self_s, self_len,
2826 from_s, from_len,
2827 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 if (offset == -1) {
2829 /* No matches; return the original string */
2830 return return_self(self);
2831 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002832
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002833 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002834 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835 if (result == NULL)
2836 return NULL;
2837 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002838 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002839
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840 /* change everything in-place, starting with this one */
2841 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002842 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843 start += from_len;
2844 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002845
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 while ( --maxcount > 0) {
2847 offset = findstring(start, end-start,
2848 from_s, from_len,
2849 0, end-start, FORWARD);
2850 if (offset==-1)
2851 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002852 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002853 start += offset+from_len;
2854 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002855
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 return result;
2857}
2858
2859/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002860Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002861replace_single_character(PyStringObject *self,
2862 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002863 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864 Py_ssize_t maxcount)
2865{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002866 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002868 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 Py_ssize_t count, product;
2870 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002871
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 self_s = PyString_AS_STRING(self);
2873 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002874
Andrew Dalke51324072006-05-26 20:25:22 +00002875 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876 if (count == 0) {
2877 /* no matches, return unchanged */
2878 return return_self(self);
2879 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002880
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 /* use the difference between current and new, hence the "-1" */
2882 /* result_len = self_len + count * (to_len-1) */
2883 product = count * (to_len-1);
2884 if (product / (to_len-1) != count) {
2885 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2886 return NULL;
2887 }
2888 result_len = self_len + product;
2889 if (result_len < 0) {
2890 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2891 return NULL;
2892 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002893
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 if ( (result = (PyStringObject *)
2895 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2896 return NULL;
2897 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002898
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002899 start = self_s;
2900 end = self_s + self_len;
2901 while (count-- > 0) {
2902 next = findchar(start, end-start, from_c);
2903 if (next == NULL)
2904 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002905
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 if (next == start) {
2907 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002908 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909 result_s += to_len;
2910 start += 1;
2911 } else {
2912 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002913 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002915 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 result_s += to_len;
2917 start = next+1;
2918 }
2919 }
2920 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002921 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002922
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 return result;
2924}
2925
2926/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002927Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002928replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002929 const char *from_s, Py_ssize_t from_len,
2930 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002931 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002932 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002933 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002934 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 Py_ssize_t count, offset, product;
2936 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002937
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002938 self_s = PyString_AS_STRING(self);
2939 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002940
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002941 count = countstring(self_s, self_len,
2942 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002943 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002944 if (count == 0) {
2945 /* no matches, return unchanged */
2946 return return_self(self);
2947 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002948
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002949 /* Check for overflow */
2950 /* result_len = self_len + count * (to_len-from_len) */
2951 product = count * (to_len-from_len);
2952 if (product / (to_len-from_len) != count) {
2953 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2954 return NULL;
2955 }
2956 result_len = self_len + product;
2957 if (result_len < 0) {
2958 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2959 return NULL;
2960 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002961
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002962 if ( (result = (PyStringObject *)
2963 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2964 return NULL;
2965 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002966
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002967 start = self_s;
2968 end = self_s + self_len;
2969 while (count-- > 0) {
2970 offset = findstring(start, end-start,
2971 from_s, from_len,
2972 0, end-start, FORWARD);
2973 if (offset == -1)
2974 break;
2975 next = start+offset;
2976 if (next == start) {
2977 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002978 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002979 result_s += to_len;
2980 start += from_len;
2981 } else {
2982 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002983 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002984 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002985 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002986 result_s += to_len;
2987 start = next+from_len;
2988 }
2989 }
2990 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002991 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002992
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 return result;
2994}
2995
2996
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002997Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002999 const char *from_s, Py_ssize_t from_len,
3000 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003001 Py_ssize_t maxcount)
3002{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003003 if (maxcount < 0) {
3004 maxcount = PY_SSIZE_T_MAX;
3005 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3006 /* nothing to do; return the original string */
3007 return return_self(self);
3008 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003009
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003010 if (maxcount == 0 ||
3011 (from_len == 0 && to_len == 0)) {
3012 /* nothing to do; return the original string */
3013 return return_self(self);
3014 }
3015
3016 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003017
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003018 if (from_len == 0) {
3019 /* insert the 'to' string everywhere. */
3020 /* >>> "Python".replace("", ".") */
3021 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003022 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003023 }
3024
3025 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3026 /* point for an empty self string to generate a non-empty string */
3027 /* Special case so the remaining code always gets a non-empty string */
3028 if (PyString_GET_SIZE(self) == 0) {
3029 return return_self(self);
3030 }
3031
3032 if (to_len == 0) {
3033 /* delete all occurances of 'from' string */
3034 if (from_len == 1) {
3035 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003036 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003037 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 }
3040 }
3041
3042 /* Handle special case where both strings have the same length */
3043
3044 if (from_len == to_len) {
3045 if (from_len == 1) {
3046 return replace_single_character_in_place(
3047 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003048 from_s[0],
3049 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003050 maxcount);
3051 } else {
3052 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003053 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003054 }
3055 }
3056
3057 /* Otherwise use the more generic algorithms */
3058 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003059 return replace_single_character(self, from_s[0],
3060 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003061 } else {
3062 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003063 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 }
3065}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003066
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003067PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003068"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003069\n\
3070Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003071old replaced by new. If the optional argument count is\n\
3072given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003073
3074static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003075string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003077 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003079 const char *from_s, *to_s;
3080 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003081
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003085 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003086 from_s = PyString_AS_STRING(from);
3087 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003089#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003090 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003091 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003093#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003094 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095 return NULL;
3096
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003097 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003098 to_s = PyString_AS_STRING(to);
3099 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003101#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003102 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003103 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003104 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003105#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003106 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003107 return NULL;
3108
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003110 from_s, from_len,
3111 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003112}
3113
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003114/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003115
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003116/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003117 * against substr, using the start and end arguments. Returns
3118 * -1 on error, 0 if not found and 1 if found.
3119 */
3120Py_LOCAL(int)
3121_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3122 Py_ssize_t end, int direction)
3123{
3124 Py_ssize_t len = PyString_GET_SIZE(self);
3125 Py_ssize_t slen;
3126 const char* sub;
3127 const char* str;
3128
3129 if (PyString_Check(substr)) {
3130 sub = PyString_AS_STRING(substr);
3131 slen = PyString_GET_SIZE(substr);
3132 }
3133#ifdef Py_USING_UNICODE
3134 else if (PyUnicode_Check(substr))
3135 return PyUnicode_Tailmatch((PyObject *)self,
3136 substr, start, end, direction);
3137#endif
3138 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3139 return -1;
3140 str = PyString_AS_STRING(self);
3141
3142 string_adjust_indices(&start, &end, len);
3143
3144 if (direction < 0) {
3145 /* startswith */
3146 if (start+slen > len)
3147 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003148 } else {
3149 /* endswith */
3150 if (end-start < slen || start > len)
3151 return 0;
3152
3153 if (end-slen > start)
3154 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003155 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003156 if (end-start >= slen)
3157 return ! memcmp(str+start, sub, slen);
3158 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003159}
3160
3161
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003162PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003163"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003164\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003165Return True if S starts with the specified prefix, False otherwise.\n\
3166With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003167With optional end, stop comparing S at that position.\n\
3168prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169
3170static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003171string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003173 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003174 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003176 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177
Guido van Rossumc6821402000-05-08 14:08:05 +00003178 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3179 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003181 if (PyTuple_Check(subobj)) {
3182 Py_ssize_t i;
3183 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3184 result = _string_tailmatch(self,
3185 PyTuple_GET_ITEM(subobj, i),
3186 start, end, -1);
3187 if (result == -1)
3188 return NULL;
3189 else if (result) {
3190 Py_RETURN_TRUE;
3191 }
3192 }
3193 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 }
Georg Brandl24250812006-06-09 18:45:48 +00003195 result = _string_tailmatch(self, subobj, start, end, -1);
3196 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003197 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003198 else
Georg Brandl24250812006-06-09 18:45:48 +00003199 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003200}
3201
3202
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003203PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003204"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003205\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003206Return True if S ends with the specified suffix, False otherwise.\n\
3207With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003208With optional end, stop comparing S at that position.\n\
3209suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003210
3211static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003212string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003214 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003215 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003217 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218
Guido van Rossumc6821402000-05-08 14:08:05 +00003219 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3220 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003222 if (PyTuple_Check(subobj)) {
3223 Py_ssize_t i;
3224 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3225 result = _string_tailmatch(self,
3226 PyTuple_GET_ITEM(subobj, i),
3227 start, end, +1);
3228 if (result == -1)
3229 return NULL;
3230 else if (result) {
3231 Py_RETURN_TRUE;
3232 }
3233 }
3234 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 }
Georg Brandl24250812006-06-09 18:45:48 +00003236 result = _string_tailmatch(self, subobj, start, end, +1);
3237 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003238 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003239 else
Georg Brandl24250812006-06-09 18:45:48 +00003240 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003241}
3242
3243
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003244PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003245"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003246\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003247Encodes S using the codec registered for encoding. encoding defaults\n\
3248to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003249handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003250a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3251'xmlcharrefreplace' as well as any other name registered with\n\
3252codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003253
3254static PyObject *
3255string_encode(PyStringObject *self, PyObject *args)
3256{
3257 char *encoding = NULL;
3258 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003259 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003260
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003261 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3262 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003263 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003264 if (v == NULL)
3265 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003266 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3267 PyErr_Format(PyExc_TypeError,
3268 "encoder did not return a string/unicode object "
3269 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003270 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 Py_DECREF(v);
3272 return NULL;
3273 }
3274 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003275
3276 onError:
3277 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003278}
3279
3280
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003281PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003282"S.decode([encoding[,errors]]) -> object\n\
3283\n\
3284Decodes S using the codec registered for encoding. encoding defaults\n\
3285to the default encoding. errors may be given to set a different error\n\
3286handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003287a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3288as well as any other name registerd with codecs.register_error that is\n\
3289able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003290
3291static PyObject *
3292string_decode(PyStringObject *self, PyObject *args)
3293{
3294 char *encoding = NULL;
3295 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003296 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003297
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003298 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3299 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003300 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003301 if (v == NULL)
3302 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003303 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3304 PyErr_Format(PyExc_TypeError,
3305 "decoder did not return a string/unicode object "
3306 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003307 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003308 Py_DECREF(v);
3309 return NULL;
3310 }
3311 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003312
3313 onError:
3314 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003315}
3316
3317
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003318PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003319"S.expandtabs([tabsize]) -> string\n\
3320\n\
3321Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003322If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003323
3324static PyObject*
3325string_expandtabs(PyStringObject *self, PyObject *args)
3326{
3327 const char *e, *p;
3328 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003329 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003330 PyObject *u;
3331 int tabsize = 8;
3332
3333 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3334 return NULL;
3335
Thomas Wouters7e474022000-07-16 12:04:32 +00003336 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003337 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3339 for (p = PyString_AS_STRING(self); p < e; p++)
3340 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003341 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003343 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003344 PyErr_SetString(PyExc_OverflowError,
3345 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003346 return NULL;
3347 }
3348 old_j = j;
3349 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003350 }
3351 else {
3352 j++;
3353 if (*p == '\n' || *p == '\r') {
3354 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003355 old_j = j = 0;
3356 if (i < 0) {
3357 PyErr_SetString(PyExc_OverflowError,
3358 "new string is too long");
3359 return NULL;
3360 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003361 }
3362 }
3363
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003364 if ((i + j) < 0) {
3365 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3366 return NULL;
3367 }
3368
Guido van Rossum4c08d552000-03-10 22:55:18 +00003369 /* Second pass: create output string and fill it */
3370 u = PyString_FromStringAndSize(NULL, i + j);
3371 if (!u)
3372 return NULL;
3373
3374 j = 0;
3375 q = PyString_AS_STRING(u);
3376
3377 for (p = PyString_AS_STRING(self); p < e; p++)
3378 if (*p == '\t') {
3379 if (tabsize > 0) {
3380 i = tabsize - (j % tabsize);
3381 j += i;
3382 while (i--)
3383 *q++ = ' ';
3384 }
3385 }
3386 else {
3387 j++;
3388 *q++ = *p;
3389 if (*p == '\n' || *p == '\r')
3390 j = 0;
3391 }
3392
3393 return u;
3394}
3395
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003396Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003397pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398{
3399 PyObject *u;
3400
3401 if (left < 0)
3402 left = 0;
3403 if (right < 0)
3404 right = 0;
3405
Tim Peters8fa5dd02001-09-12 02:18:30 +00003406 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 Py_INCREF(self);
3408 return (PyObject *)self;
3409 }
3410
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003411 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 left + PyString_GET_SIZE(self) + right);
3413 if (u) {
3414 if (left)
3415 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003416 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003417 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003418 PyString_GET_SIZE(self));
3419 if (right)
3420 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3421 fill, right);
3422 }
3423
3424 return u;
3425}
3426
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003427PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003428"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003429"\n"
3430"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003431"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432
3433static PyObject *
3434string_ljust(PyStringObject *self, PyObject *args)
3435{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003436 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003437 char fillchar = ' ';
3438
Thomas Wouters4abb3662006-04-19 14:50:15 +00003439 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440 return NULL;
3441
Tim Peters8fa5dd02001-09-12 02:18:30 +00003442 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443 Py_INCREF(self);
3444 return (PyObject*) self;
3445 }
3446
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003447 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448}
3449
3450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003451PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003452"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003453"\n"
3454"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456
3457static PyObject *
3458string_rjust(PyStringObject *self, PyObject *args)
3459{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003460 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003461 char fillchar = ' ';
3462
Thomas Wouters4abb3662006-04-19 14:50:15 +00003463 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464 return NULL;
3465
Tim Peters8fa5dd02001-09-12 02:18:30 +00003466 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003467 Py_INCREF(self);
3468 return (PyObject*) self;
3469 }
3470
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003471 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003472}
3473
3474
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003475PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003476"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003477"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003478"Return S centered in a string of length width. Padding is\n"
3479"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480
3481static PyObject *
3482string_center(PyStringObject *self, PyObject *args)
3483{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003484 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003485 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003486 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003487
Thomas Wouters4abb3662006-04-19 14:50:15 +00003488 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003489 return NULL;
3490
Tim Peters8fa5dd02001-09-12 02:18:30 +00003491 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492 Py_INCREF(self);
3493 return (PyObject*) self;
3494 }
3495
3496 marg = width - PyString_GET_SIZE(self);
3497 left = marg / 2 + (marg & width & 1);
3498
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003499 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500}
3501
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003502PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003503"S.zfill(width) -> string\n"
3504"\n"
3505"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003506"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003507
3508static PyObject *
3509string_zfill(PyStringObject *self, PyObject *args)
3510{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003511 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003512 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003513 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003514 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003515
Thomas Wouters4abb3662006-04-19 14:50:15 +00003516 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003517 return NULL;
3518
3519 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003520 if (PyString_CheckExact(self)) {
3521 Py_INCREF(self);
3522 return (PyObject*) self;
3523 }
3524 else
3525 return PyString_FromStringAndSize(
3526 PyString_AS_STRING(self),
3527 PyString_GET_SIZE(self)
3528 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003529 }
3530
3531 fill = width - PyString_GET_SIZE(self);
3532
3533 s = pad(self, fill, 0, '0');
3534
3535 if (s == NULL)
3536 return NULL;
3537
3538 p = PyString_AS_STRING(s);
3539 if (p[fill] == '+' || p[fill] == '-') {
3540 /* move sign to beginning of string */
3541 p[0] = p[fill];
3542 p[fill] = '0';
3543 }
3544
3545 return (PyObject*) s;
3546}
3547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003549"S.isspace() -> bool\n\
3550\n\
3551Return True if all characters in S are whitespace\n\
3552and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553
3554static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003555string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003556{
Fred Drakeba096332000-07-09 07:04:36 +00003557 register const unsigned char *p
3558 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003559 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003560
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561 /* Shortcut for single character strings */
3562 if (PyString_GET_SIZE(self) == 1 &&
3563 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003565
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003566 /* Special case for empty strings */
3567 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003568 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003569
Guido van Rossum4c08d552000-03-10 22:55:18 +00003570 e = p + PyString_GET_SIZE(self);
3571 for (; p < e; p++) {
3572 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003576}
3577
3578
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003579PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003582Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003583and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584
3585static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003586string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587{
Fred Drakeba096332000-07-09 07:04:36 +00003588 register const unsigned char *p
3589 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003590 register const unsigned char *e;
3591
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592 /* Shortcut for single character strings */
3593 if (PyString_GET_SIZE(self) == 1 &&
3594 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003595 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596
3597 /* Special case for empty strings */
3598 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600
3601 e = p + PyString_GET_SIZE(self);
3602 for (; p < e; p++) {
3603 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607}
3608
3609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003610PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003612\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003613Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003614and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003615
3616static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003617string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618{
Fred Drakeba096332000-07-09 07:04:36 +00003619 register const unsigned char *p
3620 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003621 register const unsigned char *e;
3622
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623 /* Shortcut for single character strings */
3624 if (PyString_GET_SIZE(self) == 1 &&
3625 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003627
3628 /* Special case for empty strings */
3629 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003631
3632 e = p + PyString_GET_SIZE(self);
3633 for (; p < e; p++) {
3634 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003636 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003638}
3639
3640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003641PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003644Return True if all characters in S are digits\n\
3645and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646
3647static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003648string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649{
Fred Drakeba096332000-07-09 07:04:36 +00003650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003652 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 /* Shortcut for single character strings */
3655 if (PyString_GET_SIZE(self) == 1 &&
3656 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003662
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663 e = p + PyString_GET_SIZE(self);
3664 for (; p < e; p++) {
3665 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669}
3670
3671
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003672PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677
3678static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003679string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680{
Fred Drakeba096332000-07-09 07:04:36 +00003681 register const unsigned char *p
3682 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003683 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003684 int cased;
3685
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 /* Shortcut for single character strings */
3687 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003690 /* Special case for empty strings */
3691 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003693
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694 e = p + PyString_GET_SIZE(self);
3695 cased = 0;
3696 for (; p < e; p++) {
3697 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003698 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 else if (!cased && islower(*p))
3700 cased = 1;
3701 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703}
3704
3705
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003706PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003709Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003710at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711
3712static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003713string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714{
Fred Drakeba096332000-07-09 07:04:36 +00003715 register const unsigned char *p
3716 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003717 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718 int cased;
3719
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 /* Shortcut for single character strings */
3721 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003722 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003724 /* Special case for empty strings */
3725 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003726 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003727
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 e = p + PyString_GET_SIZE(self);
3729 cased = 0;
3730 for (; p < e; p++) {
3731 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 else if (!cased && isupper(*p))
3734 cased = 1;
3735 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737}
3738
3739
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003740PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003741"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003743Return True if S is a titlecased string and there is at least one\n\
3744character in S, i.e. uppercase characters may only follow uncased\n\
3745characters and lowercase characters only cased ones. Return False\n\
3746otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747
3748static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003749string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750{
Fred Drakeba096332000-07-09 07:04:36 +00003751 register const unsigned char *p
3752 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003753 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754 int cased, previous_is_cased;
3755
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 /* Shortcut for single character strings */
3757 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003758 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003760 /* Special case for empty strings */
3761 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003762 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003763
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764 e = p + PyString_GET_SIZE(self);
3765 cased = 0;
3766 previous_is_cased = 0;
3767 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003768 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769
3770 if (isupper(ch)) {
3771 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003772 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773 previous_is_cased = 1;
3774 cased = 1;
3775 }
3776 else if (islower(ch)) {
3777 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003778 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 previous_is_cased = 1;
3780 cased = 1;
3781 }
3782 else
3783 previous_is_cased = 0;
3784 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003785 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786}
3787
3788
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003789PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003790"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003791\n\
3792Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003793Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003794is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796static PyObject*
3797string_splitlines(PyStringObject *self, PyObject *args)
3798{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003799 register Py_ssize_t i;
3800 register Py_ssize_t j;
3801 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003802 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803 PyObject *list;
3804 PyObject *str;
3805 char *data;
3806
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003807 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 return NULL;
3809
3810 data = PyString_AS_STRING(self);
3811 len = PyString_GET_SIZE(self);
3812
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003813 /* This does not use the preallocated list because splitlines is
3814 usually run with hundreds of newlines. The overhead of
3815 switching between PyList_SET_ITEM and append causes about a
3816 2-3% slowdown for that common case. A smarter implementation
3817 could move the if check out, so the SET_ITEMs are done first
3818 and the appends only done when the prealloc buffer is full.
3819 That's too much work for little gain.*/
3820
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 list = PyList_New(0);
3822 if (!list)
3823 goto onError;
3824
3825 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003826 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003827
Guido van Rossum4c08d552000-03-10 22:55:18 +00003828 /* Find a line and append it */
3829 while (i < len && data[i] != '\n' && data[i] != '\r')
3830 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831
3832 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003833 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834 if (i < len) {
3835 if (data[i] == '\r' && i + 1 < len &&
3836 data[i+1] == '\n')
3837 i += 2;
3838 else
3839 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003840 if (keepends)
3841 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003842 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003843 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844 j = i;
3845 }
3846 if (j < len) {
3847 SPLIT_APPEND(data, j, len);
3848 }
3849
3850 return list;
3851
3852 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003853 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003854 return NULL;
3855}
3856
3857#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003858#undef SPLIT_ADD
3859#undef MAX_PREALLOC
3860#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003861
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003862static PyObject *
3863string_getnewargs(PyStringObject *v)
3864{
Martin v. Löwis68192102007-07-21 06:55:02 +00003865 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003866}
3867
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003868
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003869static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003870string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003871 /* Counterparts of the obsolete stropmodule functions; except
3872 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003873 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3874 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003875 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003876 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3877 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003878 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3879 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3880 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3881 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3882 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3883 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3884 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003885 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3886 capitalize__doc__},
3887 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3888 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3889 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003890 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003891 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3892 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3893 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3894 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3895 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3896 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3897 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003898 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3899 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003900 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3901 startswith__doc__},
3902 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3903 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3904 swapcase__doc__},
3905 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3906 translate__doc__},
3907 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3908 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3909 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3910 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3911 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3912 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3913 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3914 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3915 expandtabs__doc__},
3916 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3917 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003918 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003919 {NULL, NULL} /* sentinel */
3920};
3921
Jeremy Hylton938ace62002-07-17 16:30:39 +00003922static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003923str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3924
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003925static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003926string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003927{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003928 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003929 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003930
Guido van Rossumae960af2001-08-30 03:11:59 +00003931 if (type != &PyString_Type)
3932 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003933 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3934 return NULL;
3935 if (x == NULL)
3936 return PyString_FromString("");
3937 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003938}
3939
Guido van Rossumae960af2001-08-30 03:11:59 +00003940static PyObject *
3941str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3942{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003943 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003944 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003945
3946 assert(PyType_IsSubtype(type, &PyString_Type));
3947 tmp = string_new(&PyString_Type, args, kwds);
3948 if (tmp == NULL)
3949 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003950 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003951 n = PyString_GET_SIZE(tmp);
3952 pnew = type->tp_alloc(type, n);
3953 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003954 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003955 ((PyStringObject *)pnew)->ob_shash =
3956 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003957 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003958 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003959 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003960 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003961}
3962
Guido van Rossumcacfc072002-05-24 19:01:59 +00003963static PyObject *
3964basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3965{
3966 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003967 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003968 return NULL;
3969}
3970
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003971static PyObject *
3972string_mod(PyObject *v, PyObject *w)
3973{
3974 if (!PyString_Check(v)) {
3975 Py_INCREF(Py_NotImplemented);
3976 return Py_NotImplemented;
3977 }
3978 return PyString_Format(v, w);
3979}
3980
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003981PyDoc_STRVAR(basestring_doc,
3982"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003983
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003984static PyNumberMethods string_as_number = {
3985 0, /*nb_add*/
3986 0, /*nb_subtract*/
3987 0, /*nb_multiply*/
3988 0, /*nb_divide*/
3989 string_mod, /*nb_remainder*/
3990};
3991
3992
Guido van Rossumcacfc072002-05-24 19:01:59 +00003993PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00003994 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003995 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003996 0,
3997 0,
3998 0, /* tp_dealloc */
3999 0, /* tp_print */
4000 0, /* tp_getattr */
4001 0, /* tp_setattr */
4002 0, /* tp_compare */
4003 0, /* tp_repr */
4004 0, /* tp_as_number */
4005 0, /* tp_as_sequence */
4006 0, /* tp_as_mapping */
4007 0, /* tp_hash */
4008 0, /* tp_call */
4009 0, /* tp_str */
4010 0, /* tp_getattro */
4011 0, /* tp_setattro */
4012 0, /* tp_as_buffer */
4013 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4014 basestring_doc, /* tp_doc */
4015 0, /* tp_traverse */
4016 0, /* tp_clear */
4017 0, /* tp_richcompare */
4018 0, /* tp_weaklistoffset */
4019 0, /* tp_iter */
4020 0, /* tp_iternext */
4021 0, /* tp_methods */
4022 0, /* tp_members */
4023 0, /* tp_getset */
4024 &PyBaseObject_Type, /* tp_base */
4025 0, /* tp_dict */
4026 0, /* tp_descr_get */
4027 0, /* tp_descr_set */
4028 0, /* tp_dictoffset */
4029 0, /* tp_init */
4030 0, /* tp_alloc */
4031 basestring_new, /* tp_new */
4032 0, /* tp_free */
4033};
4034
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004035PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004036"str(object) -> string\n\
4037\n\
4038Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004039If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004040
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004042 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004043 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004044 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004045 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004046 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004047 (printfunc)string_print, /* tp_print */
4048 0, /* tp_getattr */
4049 0, /* tp_setattr */
4050 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004051 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004052 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004053 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004054 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004055 (hashfunc)string_hash, /* tp_hash */
4056 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004057 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004058 PyObject_GenericGetAttr, /* tp_getattro */
4059 0, /* tp_setattro */
4060 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004061 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004062 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004063 string_doc, /* tp_doc */
4064 0, /* tp_traverse */
4065 0, /* tp_clear */
4066 (richcmpfunc)string_richcompare, /* tp_richcompare */
4067 0, /* tp_weaklistoffset */
4068 0, /* tp_iter */
4069 0, /* tp_iternext */
4070 string_methods, /* tp_methods */
4071 0, /* tp_members */
4072 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004073 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004074 0, /* tp_dict */
4075 0, /* tp_descr_get */
4076 0, /* tp_descr_set */
4077 0, /* tp_dictoffset */
4078 0, /* tp_init */
4079 0, /* tp_alloc */
4080 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004081 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004082};
4083
4084void
Fred Drakeba096332000-07-09 07:04:36 +00004085PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004086{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004087 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004088 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004090 if (w == NULL || !PyString_Check(*pv)) {
4091 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004092 *pv = NULL;
4093 return;
4094 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004095 v = string_concat((PyStringObject *) *pv, w);
4096 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004097 *pv = v;
4098}
4099
Guido van Rossum013142a1994-08-30 08:19:36 +00004100void
Fred Drakeba096332000-07-09 07:04:36 +00004101PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004102{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004103 PyString_Concat(pv, w);
4104 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004105}
4106
4107
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004108/* The following function breaks the notion that strings are immutable:
4109 it changes the size of a string. We get away with this only if there
4110 is only one module referencing the object. You can also think of it
4111 as creating a new string object and destroying the old one, only
4112 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004113 already be known to some other part of the code...
4114 Note that if there's not enough memory to resize the string, the original
4115 string object at *pv is deallocated, *pv is set to NULL, an "out of
4116 memory" exception is set, and -1 is returned. Else (on success) 0 is
4117 returned, and the value in *pv may or may not be the same as on input.
4118 As always, an extra byte is allocated for a trailing \0 byte (newsize
4119 does *not* include that), and a trailing \0 byte is stored.
4120*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004121
4122int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004123_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004125 register PyObject *v;
4126 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004127 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004128 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004129 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004130 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004131 Py_DECREF(v);
4132 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004133 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004134 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004135 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004136 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004137 _Py_ForgetReference(v);
4138 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004139 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004140 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004141 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004142 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004143 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004144 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004145 _Py_NewReference(*pv);
4146 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004147 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004148 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004149 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004150 return 0;
4151}
Guido van Rossume5372401993-03-16 12:15:04 +00004152
4153/* Helpers for formatstring */
4154
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004155Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004156getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004157{
Thomas Wouters977485d2006-02-16 15:59:12 +00004158 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004159 if (argidx < arglen) {
4160 (*p_argidx)++;
4161 if (arglen < 0)
4162 return args;
4163 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004164 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004165 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004166 PyErr_SetString(PyExc_TypeError,
4167 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004168 return NULL;
4169}
4170
Tim Peters38fd5b62000-09-21 05:43:11 +00004171/* Format codes
4172 * F_LJUST '-'
4173 * F_SIGN '+'
4174 * F_BLANK ' '
4175 * F_ALT '#'
4176 * F_ZERO '0'
4177 */
Guido van Rossume5372401993-03-16 12:15:04 +00004178#define F_LJUST (1<<0)
4179#define F_SIGN (1<<1)
4180#define F_BLANK (1<<2)
4181#define F_ALT (1<<3)
4182#define F_ZERO (1<<4)
4183
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004184Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004185formatfloat(char *buf, size_t buflen, int flags,
4186 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004187{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004188 /* fmt = '%#.' + `prec` + `type`
4189 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004190 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004191 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004192 x = PyFloat_AsDouble(v);
4193 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004194 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004195 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004196 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004197 }
Guido van Rossume5372401993-03-16 12:15:04 +00004198 if (prec < 0)
4199 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004200 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4201 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004202 /* Worst case length calc to ensure no buffer overrun:
4203
4204 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004205 fmt = %#.<prec>g
4206 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004207 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004208 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004209
4210 'f' formats:
4211 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4212 len = 1 + 50 + 1 + prec = 52 + prec
4213
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004214 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004215 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004216
4217 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004218 if (((type == 'g' || type == 'G') &&
4219 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004220 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004221 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004222 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004223 return -1;
4224 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004225 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4226 (flags&F_ALT) ? "#" : "",
4227 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004228 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004229 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004230}
4231
Tim Peters38fd5b62000-09-21 05:43:11 +00004232/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4233 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4234 * Python's regular ints.
4235 * Return value: a new PyString*, or NULL if error.
4236 * . *pbuf is set to point into it,
4237 * *plen set to the # of chars following that.
4238 * Caller must decref it when done using pbuf.
4239 * The string starting at *pbuf is of the form
4240 * "-"? ("0x" | "0X")? digit+
4241 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004242 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004243 * There will be at least prec digits, zero-filled on the left if
4244 * necessary to get that many.
4245 * val object to be converted
4246 * flags bitmask of format flags; only F_ALT is looked at
4247 * prec minimum number of digits; 0-fill on left if needed
4248 * type a character in [duoxX]; u acts the same as d
4249 *
4250 * CAUTION: o, x and X conversions on regular ints can never
4251 * produce a '-' sign, but can for Python's unbounded ints.
4252 */
4253PyObject*
4254_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4255 char **pbuf, int *plen)
4256{
4257 PyObject *result = NULL;
4258 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004259 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004260 int sign; /* 1 if '-', else 0 */
4261 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004262 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004263 int numdigits; /* len == numnondigits + numdigits */
4264 int numnondigits = 0;
4265
4266 switch (type) {
4267 case 'd':
4268 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004269 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 break;
4271 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004272 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004273 break;
4274 case 'x':
4275 case 'X':
4276 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004277 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004278 break;
4279 default:
4280 assert(!"'type' not in [duoxX]");
4281 }
4282 if (!result)
4283 return NULL;
4284
Neal Norwitz56423e52006-08-13 18:11:08 +00004285 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004286 if (!buf) {
4287 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004288 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004289 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004290
Tim Peters38fd5b62000-09-21 05:43:11 +00004291 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004292 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004293 PyErr_BadInternalCall();
4294 return NULL;
4295 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004296 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004297 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004298 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4299 return NULL;
4300 }
4301 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004302 if (buf[len-1] == 'L') {
4303 --len;
4304 buf[len] = '\0';
4305 }
4306 sign = buf[0] == '-';
4307 numnondigits += sign;
4308 numdigits = len - numnondigits;
4309 assert(numdigits > 0);
4310
Tim Petersfff53252001-04-12 18:38:48 +00004311 /* Get rid of base marker unless F_ALT */
4312 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004313 /* Need to skip 0x, 0X or 0. */
4314 int skipped = 0;
4315 switch (type) {
4316 case 'o':
4317 assert(buf[sign] == '0');
4318 /* If 0 is only digit, leave it alone. */
4319 if (numdigits > 1) {
4320 skipped = 1;
4321 --numdigits;
4322 }
4323 break;
4324 case 'x':
4325 case 'X':
4326 assert(buf[sign] == '0');
4327 assert(buf[sign + 1] == 'x');
4328 skipped = 2;
4329 numnondigits -= 2;
4330 break;
4331 }
4332 if (skipped) {
4333 buf += skipped;
4334 len -= skipped;
4335 if (sign)
4336 buf[0] = '-';
4337 }
4338 assert(len == numnondigits + numdigits);
4339 assert(numdigits > 0);
4340 }
4341
4342 /* Fill with leading zeroes to meet minimum width. */
4343 if (prec > numdigits) {
4344 PyObject *r1 = PyString_FromStringAndSize(NULL,
4345 numnondigits + prec);
4346 char *b1;
4347 if (!r1) {
4348 Py_DECREF(result);
4349 return NULL;
4350 }
4351 b1 = PyString_AS_STRING(r1);
4352 for (i = 0; i < numnondigits; ++i)
4353 *b1++ = *buf++;
4354 for (i = 0; i < prec - numdigits; i++)
4355 *b1++ = '0';
4356 for (i = 0; i < numdigits; i++)
4357 *b1++ = *buf++;
4358 *b1 = '\0';
4359 Py_DECREF(result);
4360 result = r1;
4361 buf = PyString_AS_STRING(result);
4362 len = numnondigits + prec;
4363 }
4364
4365 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004366 if (type == 'X') {
4367 /* Need to convert all lower case letters to upper case.
4368 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004369 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004370 if (buf[i] >= 'a' && buf[i] <= 'x')
4371 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004372 }
4373 *pbuf = buf;
4374 *plen = len;
4375 return result;
4376}
4377
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004378Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004379formatint(char *buf, size_t buflen, int flags,
4380 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004381{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004382 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004383 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4384 + 1 + 1 = 24 */
4385 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004386 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004387 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004388
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004389 x = PyInt_AsLong(v);
4390 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004391 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004392 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004393 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004394 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004395 if (x < 0 && type == 'u') {
4396 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004397 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004398 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4399 sign = "-";
4400 else
4401 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004402 if (prec < 0)
4403 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004404
4405 if ((flags & F_ALT) &&
4406 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004407 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004408 * of issues that cause pain:
4409 * - when 0 is being converted, the C standard leaves off
4410 * the '0x' or '0X', which is inconsistent with other
4411 * %#x/%#X conversions and inconsistent with Python's
4412 * hex() function
4413 * - there are platforms that violate the standard and
4414 * convert 0 with the '0x' or '0X'
4415 * (Metrowerks, Compaq Tru64)
4416 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004417 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004418 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004419 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004420 * We can achieve the desired consistency by inserting our
4421 * own '0x' or '0X' prefix, and substituting %x/%X in place
4422 * of %#x/%#X.
4423 *
4424 * Note that this is the same approach as used in
4425 * formatint() in unicodeobject.c
4426 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004427 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4428 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004429 }
4430 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004431 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4432 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004433 prec, type);
4434 }
4435
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004436 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4437 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004438 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004439 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004440 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004441 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004442 return -1;
4443 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004444 if (sign[0])
4445 PyOS_snprintf(buf, buflen, fmt, -x);
4446 else
4447 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004448 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004449}
4450
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004451Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004452formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004453{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004454 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004455 if (PyString_Check(v)) {
4456 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004457 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004458 }
4459 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004461 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004462 }
4463 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004464 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004465}
4466
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004467/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4468
4469 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4470 chars are formatted. XXX This is a magic number. Each formatting
4471 routine does bounds checking to ensure no overflow, but a better
4472 solution may be to malloc a buffer of appropriate size for each
4473 format. For now, the current solution is sufficient.
4474*/
4475#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004476
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004478PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004479{
4480 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004481 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004482 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004483 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004484 PyObject *result, *orig_args;
4485#ifdef Py_USING_UNICODE
4486 PyObject *v, *w;
4487#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004488 PyObject *dict = NULL;
4489 if (format == NULL || !PyString_Check(format) || args == NULL) {
4490 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004491 return NULL;
4492 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004493 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004494 fmt = PyString_AS_STRING(format);
4495 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004496 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004497 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004498 if (result == NULL)
4499 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 res = PyString_AsString(result);
4501 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004502 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004503 argidx = 0;
4504 }
4505 else {
4506 arglen = -1;
4507 argidx = -2;
4508 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004509 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004510 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004511 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004512 while (--fmtcnt >= 0) {
4513 if (*fmt != '%') {
4514 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004515 rescnt = fmtcnt + 100;
4516 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004517 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004518 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004519 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004521 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004522 }
4523 *res++ = *fmt++;
4524 }
4525 else {
4526 /* Got a format specifier */
4527 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004528 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004529 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004530 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004531 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004532 PyObject *v = NULL;
4533 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004534 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004535 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004536 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004537 char formatbuf[FORMATBUFLEN];
4538 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004539#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004540 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004541 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004542#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004543
Guido van Rossumda9c2711996-12-05 21:58:58 +00004544 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004545 if (*fmt == '(') {
4546 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004547 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004548 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004549 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004550
4551 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004552 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004553 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004554 goto error;
4555 }
4556 ++fmt;
4557 --fmtcnt;
4558 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004559 /* Skip over balanced parentheses */
4560 while (pcount > 0 && --fmtcnt >= 0) {
4561 if (*fmt == ')')
4562 --pcount;
4563 else if (*fmt == '(')
4564 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004565 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004566 }
4567 keylen = fmt - keystart - 1;
4568 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004569 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004570 "incomplete format key");
4571 goto error;
4572 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004573 key = PyString_FromStringAndSize(keystart,
4574 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004575 if (key == NULL)
4576 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004577 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004579 args_owned = 0;
4580 }
4581 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004582 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004583 if (args == NULL) {
4584 goto error;
4585 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004586 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004587 arglen = -1;
4588 argidx = -2;
4589 }
Guido van Rossume5372401993-03-16 12:15:04 +00004590 while (--fmtcnt >= 0) {
4591 switch (c = *fmt++) {
4592 case '-': flags |= F_LJUST; continue;
4593 case '+': flags |= F_SIGN; continue;
4594 case ' ': flags |= F_BLANK; continue;
4595 case '#': flags |= F_ALT; continue;
4596 case '0': flags |= F_ZERO; continue;
4597 }
4598 break;
4599 }
4600 if (c == '*') {
4601 v = getnextarg(args, arglen, &argidx);
4602 if (v == NULL)
4603 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004604 if (!PyInt_Check(v)) {
4605 PyErr_SetString(PyExc_TypeError,
4606 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004607 goto error;
4608 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004609 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004610 if (width < 0) {
4611 flags |= F_LJUST;
4612 width = -width;
4613 }
Guido van Rossume5372401993-03-16 12:15:04 +00004614 if (--fmtcnt >= 0)
4615 c = *fmt++;
4616 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004617 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004618 width = c - '0';
4619 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004620 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004621 if (!isdigit(c))
4622 break;
4623 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004624 PyErr_SetString(
4625 PyExc_ValueError,
4626 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004627 goto error;
4628 }
4629 width = width*10 + (c - '0');
4630 }
4631 }
4632 if (c == '.') {
4633 prec = 0;
4634 if (--fmtcnt >= 0)
4635 c = *fmt++;
4636 if (c == '*') {
4637 v = getnextarg(args, arglen, &argidx);
4638 if (v == NULL)
4639 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004640 if (!PyInt_Check(v)) {
4641 PyErr_SetString(
4642 PyExc_TypeError,
4643 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004644 goto error;
4645 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004646 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004647 if (prec < 0)
4648 prec = 0;
4649 if (--fmtcnt >= 0)
4650 c = *fmt++;
4651 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004652 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004653 prec = c - '0';
4654 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004655 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004656 if (!isdigit(c))
4657 break;
4658 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004659 PyErr_SetString(
4660 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004661 "prec too big");
4662 goto error;
4663 }
4664 prec = prec*10 + (c - '0');
4665 }
4666 }
4667 } /* prec */
4668 if (fmtcnt >= 0) {
4669 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004670 if (--fmtcnt >= 0)
4671 c = *fmt++;
4672 }
4673 }
4674 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004675 PyErr_SetString(PyExc_ValueError,
4676 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004677 goto error;
4678 }
4679 if (c != '%') {
4680 v = getnextarg(args, arglen, &argidx);
4681 if (v == NULL)
4682 goto error;
4683 }
4684 sign = 0;
4685 fill = ' ';
4686 switch (c) {
4687 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004688 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004689 len = 1;
4690 break;
4691 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004692#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004693 if (PyUnicode_Check(v)) {
4694 fmt = fmt_start;
4695 argidx = argidx_start;
4696 goto unicode;
4697 }
Georg Brandld45014b2005-10-01 17:06:00 +00004698#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004699 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004700#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004701 if (temp != NULL && PyUnicode_Check(temp)) {
4702 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004703 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004704 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004705 goto unicode;
4706 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004707#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004708 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004709 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004710 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004711 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004712 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004713 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004714 if (!PyString_Check(temp)) {
4715 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004716 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004717 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004718 goto error;
4719 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004720 pbuf = PyString_AS_STRING(temp);
4721 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004722 if (prec >= 0 && len > prec)
4723 len = prec;
4724 break;
4725 case 'i':
4726 case 'd':
4727 case 'u':
4728 case 'o':
4729 case 'x':
4730 case 'X':
4731 if (c == 'i')
4732 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004733 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004734 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004735 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004736 prec, c, &pbuf, &ilen);
4737 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004738 if (!temp)
4739 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004741 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004742 else {
4743 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004744 len = formatint(pbuf,
4745 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004746 flags, prec, c, v);
4747 if (len < 0)
4748 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004749 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004750 }
4751 if (flags & F_ZERO)
4752 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004753 break;
4754 case 'e':
4755 case 'E':
4756 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004757 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004758 case 'g':
4759 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004760 if (c == 'F')
4761 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004762 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004763 len = formatfloat(pbuf, sizeof(formatbuf),
4764 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004765 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004766 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004767 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004768 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004769 fill = '0';
4770 break;
4771 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004772#ifdef Py_USING_UNICODE
4773 if (PyUnicode_Check(v)) {
4774 fmt = fmt_start;
4775 argidx = argidx_start;
4776 goto unicode;
4777 }
4778#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004779 pbuf = formatbuf;
4780 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004781 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004782 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004783 break;
4784 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004785 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004786 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004787 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004788 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004789 (Py_ssize_t)(fmt - 1 -
4790 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004791 goto error;
4792 }
4793 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004794 if (*pbuf == '-' || *pbuf == '+') {
4795 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004796 len--;
4797 }
4798 else if (flags & F_SIGN)
4799 sign = '+';
4800 else if (flags & F_BLANK)
4801 sign = ' ';
4802 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004803 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004804 }
4805 if (width < len)
4806 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004807 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004808 reslen -= rescnt;
4809 rescnt = width + fmtcnt + 100;
4810 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004811 if (reslen < 0) {
4812 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004813 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004814 return PyErr_NoMemory();
4815 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004816 if (_PyString_Resize(&result, reslen) < 0) {
4817 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004818 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004819 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004820 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004821 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004822 }
4823 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004824 if (fill != ' ')
4825 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004826 rescnt--;
4827 if (width > len)
4828 width--;
4829 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004830 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4831 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004832 assert(pbuf[1] == c);
4833 if (fill != ' ') {
4834 *res++ = *pbuf++;
4835 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004836 }
Tim Petersfff53252001-04-12 18:38:48 +00004837 rescnt -= 2;
4838 width -= 2;
4839 if (width < 0)
4840 width = 0;
4841 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004842 }
4843 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004844 do {
4845 --rescnt;
4846 *res++ = fill;
4847 } while (--width > len);
4848 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004849 if (fill == ' ') {
4850 if (sign)
4851 *res++ = sign;
4852 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004853 (c == 'x' || c == 'X')) {
4854 assert(pbuf[0] == '0');
4855 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004856 *res++ = *pbuf++;
4857 *res++ = *pbuf++;
4858 }
4859 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004860 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004861 res += len;
4862 rescnt -= len;
4863 while (--width >= len) {
4864 --rescnt;
4865 *res++ = ' ';
4866 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004867 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004868 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004869 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004870 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004871 goto error;
4872 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004873 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004874 } /* '%' */
4875 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004876 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004877 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004878 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004879 goto error;
4880 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004881 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004882 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004883 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004884 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004885 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004886
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004887#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004888 unicode:
4889 if (args_owned) {
4890 Py_DECREF(args);
4891 args_owned = 0;
4892 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004893 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004894 if (PyTuple_Check(orig_args) && argidx > 0) {
4895 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004896 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004897 v = PyTuple_New(n);
4898 if (v == NULL)
4899 goto error;
4900 while (--n >= 0) {
4901 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4902 Py_INCREF(w);
4903 PyTuple_SET_ITEM(v, n, w);
4904 }
4905 args = v;
4906 } else {
4907 Py_INCREF(orig_args);
4908 args = orig_args;
4909 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004910 args_owned = 1;
4911 /* Take what we have of the result and let the Unicode formatting
4912 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004913 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004914 if (_PyString_Resize(&result, rescnt))
4915 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004916 fmtcnt = PyString_GET_SIZE(format) - \
4917 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004918 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4919 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004920 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004921 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004922 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004923 if (v == NULL)
4924 goto error;
4925 /* Paste what we have (result) to what the Unicode formatting
4926 function returned (v) and return the result (or error) */
4927 w = PyUnicode_Concat(result, v);
4928 Py_DECREF(result);
4929 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004930 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004931 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004932#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004933
Guido van Rossume5372401993-03-16 12:15:04 +00004934 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004935 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004936 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004937 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004938 }
Guido van Rossume5372401993-03-16 12:15:04 +00004939 return NULL;
4940}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942void
Fred Drakeba096332000-07-09 07:04:36 +00004943PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004944{
4945 register PyStringObject *s = (PyStringObject *)(*p);
4946 PyObject *t;
4947 if (s == NULL || !PyString_Check(s))
4948 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004949 /* If it's a string subclass, we don't really know what putting
4950 it in the interned dict might do. */
4951 if (!PyString_CheckExact(s))
4952 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004954 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004955 if (interned == NULL) {
4956 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957 if (interned == NULL) {
4958 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004959 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004960 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004961 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004962 t = PyDict_GetItem(interned, (PyObject *)s);
4963 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004964 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004965 Py_DECREF(*p);
4966 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004967 return;
4968 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004969
Armin Rigo79f7ad22004-08-07 19:27:39 +00004970 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004971 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972 return;
4973 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004974 /* The two references in interned are not counted by refcnt.
4975 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004976 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004977 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004978}
4979
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004980void
4981PyString_InternImmortal(PyObject **p)
4982{
4983 PyString_InternInPlace(p);
4984 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4985 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4986 Py_INCREF(*p);
4987 }
4988}
4989
Guido van Rossum2a61e741997-01-18 07:55:05 +00004990
4991PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004992PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004993{
4994 PyObject *s = PyString_FromString(cp);
4995 if (s == NULL)
4996 return NULL;
4997 PyString_InternInPlace(&s);
4998 return s;
4999}
5000
Guido van Rossum8cf04761997-08-02 02:57:45 +00005001void
Fred Drakeba096332000-07-09 07:04:36 +00005002PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005003{
5004 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005005 for (i = 0; i < UCHAR_MAX + 1; i++) {
5006 Py_XDECREF(characters[i]);
5007 characters[i] = NULL;
5008 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005009 Py_XDECREF(nullstring);
5010 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005011}
Barry Warsawa903ad982001-02-23 16:40:48 +00005012
Barry Warsawa903ad982001-02-23 16:40:48 +00005013void _Py_ReleaseInternedStrings(void)
5014{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005015 PyObject *keys;
5016 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005017 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005018 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019
5020 if (interned == NULL || !PyDict_Check(interned))
5021 return;
5022 keys = PyDict_Keys(interned);
5023 if (keys == NULL || !PyList_Check(keys)) {
5024 PyErr_Clear();
5025 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005026 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005027
5028 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5029 detector, interned strings are not forcibly deallocated; rather, we
5030 give them their stolen references back, and then clear and DECREF
5031 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005032
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005033 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005034 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5035 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005036 for (i = 0; i < n; i++) {
5037 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5038 switch (s->ob_sstate) {
5039 case SSTATE_NOT_INTERNED:
5040 /* XXX Shouldn't happen */
5041 break;
5042 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005043 Py_Refcnt(s) += 1;
5044 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005045 break;
5046 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005047 Py_Refcnt(s) += 2;
5048 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005049 break;
5050 default:
5051 Py_FatalError("Inconsistent interned string state.");
5052 }
5053 s->ob_sstate = SSTATE_NOT_INTERNED;
5054 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005055 fprintf(stderr, "total size of all interned strings: "
5056 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5057 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005058 Py_DECREF(keys);
5059 PyDict_Clear(interned);
5060 Py_DECREF(interned);
5061 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005062}