blob: ce24154becc8ba7d9ee654594d3a0f77db373684 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000722 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000755 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Brett Cannon01531592007-09-17 03:28:34 +0000793 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000810 Py_ssize_t size = Py_Size(op);
Brett Cannon01531592007-09-17 03:28:34 +0000811 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000812 while (size > INT_MAX) {
813 /* Very long strings cannot be written atomically.
814 * But don't write exactly INT_MAX bytes at a time
815 * to avoid memory aligment issues.
816 */
817 const int chunk_size = INT_MAX & ~0x3FFF;
818 fwrite(data, 1, chunk_size, fp);
819 data += chunk_size;
820 size -= chunk_size;
821 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000825 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#endif
Brett Cannon01531592007-09-17 03:28:34 +0000827 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000828 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000833 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
834 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '"';
836
Brett Cannon01531592007-09-17 03:28:34 +0000837 str_len = Py_Size(op);
838 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000840 for (i = 0; i < str_len; i++) {
841 /* Since strings are immutable and the caller should have a
842 reference, accessing the interal buffer should not be an issue
843 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\r");
853 else if (c < ' ' || c >= 0x7f)
854 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000855 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000856 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000859 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000860 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863PyObject *
864PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000866 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000867 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000869 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 PyErr_SetString(PyExc_OverflowError,
871 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000872 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000873 }
874 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000876 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 }
878 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000879 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 register char c;
881 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000882 int quote;
883
Thomas Wouters7e474022000-07-16 12:04:32 +0000884 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000885 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000886 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000887 memchr(op->ob_sval, '\'', Py_Size(op)) &&
888 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 quote = '"';
890
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000893 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000894 /* There's at least enough room for a hex escape
895 and a closing quote. */
896 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000898 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000900 else if (c == '\t')
901 *p++ = '\\', *p++ = 't';
902 else if (c == '\n')
903 *p++ = '\\', *p++ = 'n';
904 else if (c == '\r')
905 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000906 else if (c < ' ' || c >= 0x7f) {
907 /* For performance, we don't want to call
908 PyOS_snprintf here (extra layers of
909 function call). */
910 sprintf(p, "\\x%02x", c & 0xff);
911 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000912 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000913 else
914 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000916 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000917 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000919 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000920 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923}
924
Guido van Rossum189f1df2001-05-01 16:51:53 +0000925static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000926string_repr(PyObject *op)
927{
928 return PyString_Repr(op, 1);
929}
930
931static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000932string_str(PyObject *s)
933{
Tim Petersc9933152001-10-16 20:18:24 +0000934 assert(PyString_Check(s));
935 if (PyString_CheckExact(s)) {
936 Py_INCREF(s);
937 return s;
938 }
939 else {
940 /* Subtype -- return genuine string with the same value. */
941 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000942 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000943 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000944}
945
Martin v. Löwis18e16552006-02-15 17:27:45 +0000946static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000947string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948{
Martin v. Löwis68192102007-07-21 06:55:02 +0000949 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950}
951
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000953string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954{
Andrew Dalke598710c2006-05-25 18:18:39 +0000955 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956 register PyStringObject *op;
957 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000958#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000959 if (PyUnicode_Check(bb))
960 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000961#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000963 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000964 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 return NULL;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000969 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000971 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000972 Py_INCREF(bb);
973 return bb;
974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 Py_INCREF(a);
976 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000978 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000979 if (size < 0) {
980 PyErr_SetString(PyExc_OverflowError,
981 "strings are too large to concat");
982 return NULL;
983 }
984
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000985 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000986 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000987 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000989 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000990 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000991 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000992 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
993 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000994 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996#undef b
997}
998
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 register Py_ssize_t i;
1003 register Py_ssize_t j;
1004 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001006 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007 if (n < 0)
1008 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001009 /* watch out for overflows: the size can overflow int,
1010 * and the # of bytes needed can overflow size_t
1011 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001012 size = Py_Size(a) * n;
1013 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001014 PyErr_SetString(PyExc_OverflowError,
1015 "repeated string is too long");
1016 return NULL;
1017 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001018 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 Py_INCREF(a);
1020 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 }
Tim Peterse7c05322004-06-27 17:24:49 +00001022 nbytes = (size_t)size;
1023 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "repeated string is too long");
1026 return NULL;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001029 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001030 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001032 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001033 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001034 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001035 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001036 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001037 memset(op->ob_sval, a->ob_sval[0] , n);
1038 return (PyObject *) op;
1039 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i = 0;
1041 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001042 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1043 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 }
1045 while (i < size) {
1046 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001047 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 i += j;
1049 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001050 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
1053/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001056string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001058 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
1060 if (i < 0)
1061 i = 0;
1062 if (j < 0)
1063 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001064 if (j > Py_Size(a))
1065 j = Py_Size(a);
1066 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001067 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 Py_INCREF(a);
1069 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 }
1071 if (j < i)
1072 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074}
1075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001078{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001079 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (PyUnicode_Check(sub_obj))
1082 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001083#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001085 PyErr_Format(PyExc_TypeError,
1086 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001087 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001088 return -1;
1089 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001090 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001091
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001093}
1094
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001096string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001100 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001101 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001102 return NULL;
1103 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 pchar = a->ob_sval[i];
1105 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001106 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001107 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 else {
1109#ifdef COUNT_ALLOCS
1110 one_strings++;
1111#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001112 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001113 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001114 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115}
1116
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117static PyObject*
1118string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001121 Py_ssize_t len_a, len_b;
1122 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 PyObject *result;
1124
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001125 /* Make sure both arguments are strings. */
1126 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 result = Py_NotImplemented;
1128 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 if (a == b) {
1131 switch (op) {
1132 case Py_EQ:case Py_LE:case Py_GE:
1133 result = Py_True;
1134 goto out;
1135 case Py_NE:case Py_LT:case Py_GT:
1136 result = Py_False;
1137 goto out;
1138 }
1139 }
1140 if (op == Py_EQ) {
1141 /* Supporting Py_NE here as well does not save
1142 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001143 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001144 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001145 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 result = Py_True;
1147 } else {
1148 result = Py_False;
1149 }
1150 goto out;
1151 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001152 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 min_len = (len_a < len_b) ? len_a : len_b;
1154 if (min_len > 0) {
1155 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1156 if (c==0)
1157 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001158 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 c = 0;
1160 if (c == 0)
1161 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1162 switch (op) {
1163 case Py_LT: c = c < 0; break;
1164 case Py_LE: c = c <= 0; break;
1165 case Py_EQ: assert(0); break; /* unreachable */
1166 case Py_NE: c = c != 0; break;
1167 case Py_GT: c = c > 0; break;
1168 case Py_GE: c = c >= 0; break;
1169 default:
1170 result = Py_NotImplemented;
1171 goto out;
1172 }
1173 result = c ? Py_True : Py_False;
1174 out:
1175 Py_INCREF(result);
1176 return result;
1177}
1178
1179int
1180_PyString_Eq(PyObject *o1, PyObject *o2)
1181{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001182 PyStringObject *a = (PyStringObject*) o1;
1183 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001184 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001185 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001186 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001187}
1188
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189static long
Fred Drakeba096332000-07-09 07:04:36 +00001190string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 register unsigned char *p;
1194 register long x;
1195
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 if (a->ob_shash != -1)
1197 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001198 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001199 p = (unsigned char *) a->ob_sval;
1200 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001202 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001203 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 if (x == -1)
1205 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001206 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207 return x;
1208}
1209
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210static PyObject*
1211string_subscript(PyStringObject* self, PyObject* item)
1212{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001213 if (PyIndex_Check(item)) {
1214 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 if (i == -1 && PyErr_Occurred())
1216 return NULL;
1217 if (i < 0)
1218 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001219 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 }
1221 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001222 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 char* source_buf;
1224 char* result_buf;
1225 PyObject* result;
1226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 PyString_GET_SIZE(self),
1229 &start, &stop, &step, &slicelength) < 0) {
1230 return NULL;
1231 }
1232
1233 if (slicelength <= 0) {
1234 return PyString_FromStringAndSize("", 0);
1235 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001236 else if (start == 0 && step == 1 &&
1237 slicelength == PyString_GET_SIZE(self) &&
1238 PyString_CheckExact(self)) {
1239 Py_INCREF(self);
1240 return (PyObject *)self;
1241 }
1242 else if (step == 1) {
1243 return PyString_FromStringAndSize(
1244 PyString_AS_STRING(self) + start,
1245 slicelength);
1246 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001265 PyErr_Format(PyExc_TypeError,
1266 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001267 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 return NULL;
1269 }
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001277 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279 }
1280 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001281 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286{
Guido van Rossum045e6881997-09-08 18:30:11 +00001287 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001288 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001289 return -1;
1290}
1291
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292static Py_ssize_t
1293string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294{
1295 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001296 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297 return 1;
1298}
1299
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300static Py_ssize_t
1301string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001302{
1303 if ( index != 0 ) {
1304 PyErr_SetString(PyExc_SystemError,
1305 "accessing non-existent string segment");
1306 return -1;
1307 }
1308 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001309 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001310}
1311
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001312static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001313 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001314 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001315 (ssizeargfunc)string_repeat, /*sq_repeat*/
1316 (ssizeargfunc)string_item, /*sq_item*/
1317 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001318 0, /*sq_ass_item*/
1319 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001320 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001321};
1322
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001323static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001325 (binaryfunc)string_subscript,
1326 0,
1327};
1328
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001329static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001330 (readbufferproc)string_buffer_getreadbuf,
1331 (writebufferproc)string_buffer_getwritebuf,
1332 (segcountproc)string_buffer_getsegcount,
1333 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001334};
1335
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336
1337
1338#define LEFTSTRIP 0
1339#define RIGHTSTRIP 1
1340#define BOTHSTRIP 2
1341
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001342/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001343static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1344
1345#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001346
Andrew Dalke525eab32006-05-26 14:00:45 +00001347
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001348/* Don't call if length < 2 */
1349#define Py_STRING_MATCH(target, offset, pattern, length) \
1350 (target[offset] == pattern[0] && \
1351 target[offset+length-1] == pattern[length-1] && \
1352 !memcmp(target+offset+1, pattern+1, length-2) )
1353
1354
Andrew Dalke525eab32006-05-26 14:00:45 +00001355/* Overallocate the initial list to reduce the number of reallocs for small
1356 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1357 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1358 text (roughly 11 words per line) and field delimited data (usually 1-10
1359 fields). For large strings the split algorithms are bandwidth limited
1360 so increasing the preallocation likely will not improve things.*/
1361
1362#define MAX_PREALLOC 12
1363
1364/* 5 splits gives 6 elements */
1365#define PREALLOC_SIZE(maxsplit) \
1366 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1367
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368#define SPLIT_APPEND(data, left, right) \
1369 str = PyString_FromStringAndSize((data) + (left), \
1370 (right) - (left)); \
1371 if (str == NULL) \
1372 goto onError; \
1373 if (PyList_Append(list, str)) { \
1374 Py_DECREF(str); \
1375 goto onError; \
1376 } \
1377 else \
1378 Py_DECREF(str);
1379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001381 str = PyString_FromStringAndSize((data) + (left), \
1382 (right) - (left)); \
1383 if (str == NULL) \
1384 goto onError; \
1385 if (count < MAX_PREALLOC) { \
1386 PyList_SET_ITEM(list, count, str); \
1387 } else { \
1388 if (PyList_Append(list, str)) { \
1389 Py_DECREF(str); \
1390 goto onError; \
1391 } \
1392 else \
1393 Py_DECREF(str); \
1394 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001395 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001396
1397/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001398#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001399
Andrew Dalke02758d62006-05-26 15:21:01 +00001400#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1401#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1402#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1403#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1404
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001405Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001406split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Andrew Dalke525eab32006-05-26 14:00:45 +00001408 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411
1412 if (list == NULL)
1413 return NULL;
1414
Andrew Dalke02758d62006-05-26 15:21:01 +00001415 i = j = 0;
1416
1417 while (maxsplit-- > 0) {
1418 SKIP_SPACE(s, i, len);
1419 if (i==len) break;
1420 j = i; i++;
1421 SKIP_NONSPACE(s, i, len);
1422 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001424
1425 if (i < len) {
1426 /* Only occurs when maxsplit was reached */
1427 /* Skip any remaining whitespace and copy to end of string */
1428 SKIP_SPACE(s, i, len);
1429 if (i != len)
1430 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001431 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001432 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 Py_DECREF(list);
1436 return NULL;
1437}
1438
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001439Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001440split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441{
Andrew Dalke525eab32006-05-26 14:00:45 +00001442 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001444 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445
1446 if (list == NULL)
1447 return NULL;
1448
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001449 i = j = 0;
1450 while ((j < len) && (maxcount-- > 0)) {
1451 for(; j<len; j++) {
1452 /* I found that using memchr makes no difference */
1453 if (s[j] == ch) {
1454 SPLIT_ADD(s, i, j);
1455 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001456 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001457 }
1458 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001459 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001460 if (i <= len) {
1461 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001464 return list;
1465
1466 onError:
1467 Py_DECREF(list);
1468 return NULL;
1469}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001471PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472"S.split([sep [,maxsplit]]) -> list of strings\n\
1473\n\
1474Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001476splits are done. If sep is not specified or is None, any\n\
1477whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478
1479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001480string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001482 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001485 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001486#ifdef USE_FAST
1487 Py_ssize_t pos;
1488#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489
Martin v. Löwis9c830762006-04-13 08:37:17 +00001490 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001493 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 if (PyString_Check(subobj)) {
1497 sub = PyString_AS_STRING(subobj);
1498 n = PyString_GET_SIZE(subobj);
1499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001500#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 else if (PyUnicode_Check(subobj))
1502 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001503#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001504 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1505 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001506
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 if (n == 0) {
1508 PyErr_SetString(PyExc_ValueError, "empty separator");
1509 return NULL;
1510 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511 else if (n == 1)
1512 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513
Andrew Dalke525eab32006-05-26 14:00:45 +00001514 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 if (list == NULL)
1516 return NULL;
1517
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001518#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001520 while (maxsplit-- > 0) {
1521 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1522 if (pos < 0)
1523 break;
1524 j = i+pos;
1525 SPLIT_ADD(s, i, j);
1526 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001528#else
1529 i = j = 0;
1530 while ((j+n <= len) && (maxsplit-- > 0)) {
1531 for (; j+n <= len; j++) {
1532 if (Py_STRING_MATCH(s, j, sub, n)) {
1533 SPLIT_ADD(s, i, j);
1534 i = j = j + n;
1535 break;
1536 }
1537 }
1538 }
1539#endif
1540 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001541 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 return list;
1543
Andrew Dalke525eab32006-05-26 14:00:45 +00001544 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 Py_DECREF(list);
1546 return NULL;
1547}
1548
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549PyDoc_STRVAR(partition__doc__,
1550"S.partition(sep) -> (head, sep, tail)\n\
1551\n\
1552Searches for the separator sep in S, and returns the part before it,\n\
1553the separator itself, and the part after it. If the separator is not\n\
1554found, returns S and two empty strings.");
1555
1556static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001557string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001559 const char *sep;
1560 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001561
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001562 if (PyString_Check(sep_obj)) {
1563 sep = PyString_AS_STRING(sep_obj);
1564 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001565 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001566#ifdef Py_USING_UNICODE
1567 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001568 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001569#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001570 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001571 return NULL;
1572
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001573 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001574 (PyObject*) self,
1575 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1576 sep_obj, sep, sep_len
1577 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578}
1579
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001580PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001581"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001582\n\
1583Searches for the separator sep in S, starting at the end of S, and returns\n\
1584the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001585separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001586
1587static PyObject *
1588string_rpartition(PyStringObject *self, PyObject *sep_obj)
1589{
1590 const char *sep;
1591 Py_ssize_t sep_len;
1592
1593 if (PyString_Check(sep_obj)) {
1594 sep = PyString_AS_STRING(sep_obj);
1595 sep_len = PyString_GET_SIZE(sep_obj);
1596 }
1597#ifdef Py_USING_UNICODE
1598 else if (PyUnicode_Check(sep_obj))
1599 return PyUnicode_Partition((PyObject *) self, sep_obj);
1600#endif
1601 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1602 return NULL;
1603
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001604 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001605 (PyObject*) self,
1606 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1607 sep_obj, sep, sep_len
1608 );
1609}
1610
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001611Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001612rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613{
Andrew Dalke525eab32006-05-26 14:00:45 +00001614 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001615 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617
1618 if (list == NULL)
1619 return NULL;
1620
Andrew Dalke02758d62006-05-26 15:21:01 +00001621 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001622
Andrew Dalke02758d62006-05-26 15:21:01 +00001623 while (maxsplit-- > 0) {
1624 RSKIP_SPACE(s, i);
1625 if (i<0) break;
1626 j = i; i--;
1627 RSKIP_NONSPACE(s, i);
1628 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001629 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001630 if (i >= 0) {
1631 /* Only occurs when maxsplit was reached */
1632 /* Skip any remaining whitespace and copy to beginning of string */
1633 RSKIP_SPACE(s, i);
1634 if (i >= 0)
1635 SPLIT_ADD(s, 0, i + 1);
1636
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001637 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001638 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001639 if (PyList_Reverse(list) < 0)
1640 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001641 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001643 Py_DECREF(list);
1644 return NULL;
1645}
1646
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001647Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001648rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649{
Andrew Dalke525eab32006-05-26 14:00:45 +00001650 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001652 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653
1654 if (list == NULL)
1655 return NULL;
1656
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001657 i = j = len - 1;
1658 while ((i >= 0) && (maxcount-- > 0)) {
1659 for (; i >= 0; i--) {
1660 if (s[i] == ch) {
1661 SPLIT_ADD(s, i + 1, j + 1);
1662 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001663 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001664 }
1665 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001666 }
1667 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001668 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001669 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001670 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001671 if (PyList_Reverse(list) < 0)
1672 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001673 return list;
1674
1675 onError:
1676 Py_DECREF(list);
1677 return NULL;
1678}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679
1680PyDoc_STRVAR(rsplit__doc__,
1681"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1682\n\
1683Return a list of the words in the string S, using sep as the\n\
1684delimiter string, starting at the end of the string and working\n\
1685to the front. If maxsplit is given, at most maxsplit splits are\n\
1686done. If sep is not specified or is None, any whitespace string\n\
1687is a separator.");
1688
1689static PyObject *
1690string_rsplit(PyStringObject *self, PyObject *args)
1691{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001692 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001693 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001695 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001696
Martin v. Löwis9c830762006-04-13 08:37:17 +00001697 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 return NULL;
1699 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001700 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 if (subobj == Py_None)
1702 return rsplit_whitespace(s, len, maxsplit);
1703 if (PyString_Check(subobj)) {
1704 sub = PyString_AS_STRING(subobj);
1705 n = PyString_GET_SIZE(subobj);
1706 }
1707#ifdef Py_USING_UNICODE
1708 else if (PyUnicode_Check(subobj))
1709 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1710#endif
1711 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1712 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 if (n == 0) {
1715 PyErr_SetString(PyExc_ValueError, "empty separator");
1716 return NULL;
1717 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001718 else if (n == 1)
1719 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720
Andrew Dalke525eab32006-05-26 14:00:45 +00001721 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 if (list == NULL)
1723 return NULL;
1724
1725 j = len;
1726 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001727
1728 while ( (i >= 0) && (maxsplit-- > 0) ) {
1729 for (; i>=0; i--) {
1730 if (Py_STRING_MATCH(s, i, sub, n)) {
1731 SPLIT_ADD(s, i + n, j);
1732 j = i;
1733 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001734 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001735 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001737 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001738 SPLIT_ADD(s, 0, j);
1739 FIX_PREALLOC_SIZE(list);
1740 if (PyList_Reverse(list) < 0)
1741 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 return list;
1743
Andrew Dalke525eab32006-05-26 14:00:45 +00001744onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001745 Py_DECREF(list);
1746 return NULL;
1747}
1748
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001750PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751"S.join(sequence) -> string\n\
1752\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001754sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755
1756static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001757string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758{
1759 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001760 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001763 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001765 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001766 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767
Tim Peters19fe14e2001-01-19 03:03:47 +00001768 seq = PySequence_Fast(orig, "");
1769 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001770 return NULL;
1771 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001772
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001773 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 if (seqlen == 0) {
1775 Py_DECREF(seq);
1776 return PyString_FromString("");
1777 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001779 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001780 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1781 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001783 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786
Raymond Hettinger674f2412004-08-23 23:23:54 +00001787 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001788 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001789 * Do a pre-pass to figure out the total amount of space we'll
1790 * need (sz), see whether any argument is absurd, and defer to
1791 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001793 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 item = PySequence_Fast_GET_ITEM(seq, i);
1796 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001797#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001798 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001799 /* Defer to Unicode join.
1800 * CAUTION: There's no gurantee that the
1801 * original sequence can be iterated over
1802 * again, so we must pass seq here.
1803 */
1804 PyObject *result;
1805 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001806 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001807 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001808 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001809#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001810 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001811 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001812 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001813 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 Py_DECREF(seq);
1815 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001816 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001817 sz += PyString_GET_SIZE(item);
1818 if (i != 0)
1819 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001820 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001821 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001822 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 Py_DECREF(seq);
1824 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001826 }
1827
1828 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001829 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001830 if (res == NULL) {
1831 Py_DECREF(seq);
1832 return NULL;
1833 }
1834
1835 /* Catenate everything. */
1836 p = PyString_AS_STRING(res);
1837 for (i = 0; i < seqlen; ++i) {
1838 size_t n;
1839 item = PySequence_Fast_GET_ITEM(seq, i);
1840 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001841 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001842 p += n;
1843 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001844 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001845 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001846 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001848
Jeremy Hylton49048292000-07-11 03:28:17 +00001849 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851}
1852
Tim Peters52e155e2001-06-16 05:42:57 +00001853PyObject *
1854_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001855{
Tim Petersa7259592001-06-16 05:11:17 +00001856 assert(sep != NULL && PyString_Check(sep));
1857 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001858 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001859}
1860
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001861Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001862string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001863{
1864 if (*end > len)
1865 *end = len;
1866 else if (*end < 0)
1867 *end += len;
1868 if (*end < 0)
1869 *end = 0;
1870 if (*start < 0)
1871 *start += len;
1872 if (*start < 0)
1873 *start = 0;
1874}
1875
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001876Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001877string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001880 const char *sub;
1881 Py_ssize_t sub_len;
1882 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001884 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1885 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001886 return -2;
1887 if (PyString_Check(subobj)) {
1888 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001889 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001890 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001891#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001892 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001893 return PyUnicode_Find(
1894 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001895#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001896 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001897 /* XXX - the "expected a character buffer object" is pretty
1898 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899 return -2;
1900
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001901 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001902 return stringlib_find_slice(
1903 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1904 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001905 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001906 return stringlib_rfind_slice(
1907 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1908 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909}
1910
1911
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001912PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913"S.find(sub [,start [,end]]) -> int\n\
1914\n\
1915Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001916such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917arguments start and end are interpreted as in slice notation.\n\
1918\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001922string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001924 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 if (result == -2)
1926 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928}
1929
1930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932"S.index(sub [,start [,end]]) -> int\n\
1933\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001934Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935
1936static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001937string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001939 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 if (result == -2)
1941 return NULL;
1942 if (result == -1) {
1943 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001944 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945 return NULL;
1946 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001947 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948}
1949
1950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952"S.rfind(sub [,start [,end]]) -> int\n\
1953\n\
1954Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001955such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956arguments start and end are interpreted as in slice notation.\n\
1957\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001958Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959
1960static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001961string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 if (result == -2)
1965 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971"S.rindex(sub [,start [,end]]) -> int\n\
1972\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001973Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974
1975static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001976string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001978 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979 if (result == -2)
1980 return NULL;
1981 if (result == -1) {
1982 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001983 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984 return NULL;
1985 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001986 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987}
1988
1989
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001990Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1992{
1993 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001994 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001995 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001996 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1997 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998
1999 i = 0;
2000 if (striptype != RIGHTSTRIP) {
2001 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2002 i++;
2003 }
2004 }
2005
2006 j = len;
2007 if (striptype != LEFTSTRIP) {
2008 do {
2009 j--;
2010 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2011 j++;
2012 }
2013
2014 if (i == 0 && j == len && PyString_CheckExact(self)) {
2015 Py_INCREF(self);
2016 return (PyObject*)self;
2017 }
2018 else
2019 return PyString_FromStringAndSize(s+i, j-i);
2020}
2021
2022
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002023Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002024do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
2026 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002027 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 i = 0;
2030 if (striptype != RIGHTSTRIP) {
2031 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2032 i++;
2033 }
2034 }
2035
2036 j = len;
2037 if (striptype != LEFTSTRIP) {
2038 do {
2039 j--;
2040 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2041 j++;
2042 }
2043
Tim Peters8fa5dd02001-09-12 02:18:30 +00002044 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 Py_INCREF(self);
2046 return (PyObject*)self;
2047 }
2048 else
2049 return PyString_FromStringAndSize(s+i, j-i);
2050}
2051
2052
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002053Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002054do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2055{
2056 PyObject *sep = NULL;
2057
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002058 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002059 return NULL;
2060
2061 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002062 if (PyString_Check(sep))
2063 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002064#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002065 else if (PyUnicode_Check(sep)) {
2066 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2067 PyObject *res;
2068 if (uniself==NULL)
2069 return NULL;
2070 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2071 striptype, sep);
2072 Py_DECREF(uniself);
2073 return res;
2074 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002075#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002076 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002077#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002078 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002079#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002080 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002081#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002082 STRIPNAME(striptype));
2083 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084 }
2085
2086 return do_strip(self, striptype);
2087}
2088
2089
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002090PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002091"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092\n\
2093Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002095If chars is given and not None, remove characters in chars instead.\n\
2096If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
2098static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002099string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101 if (PyTuple_GET_SIZE(args) == 0)
2102 return do_strip(self, BOTHSTRIP); /* Common case */
2103 else
2104 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105}
2106
2107
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002108PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002112If chars is given and not None, remove characters in chars instead.\n\
2113If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
2115static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002116string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002118 if (PyTuple_GET_SIZE(args) == 0)
2119 return do_strip(self, LEFTSTRIP); /* Common case */
2120 else
2121 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122}
2123
2124
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002126"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002128Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002129If chars is given and not None, remove characters in chars instead.\n\
2130If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131
2132static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002133string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002135 if (PyTuple_GET_SIZE(args) == 0)
2136 return do_strip(self, RIGHTSTRIP); /* Common case */
2137 else
2138 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139}
2140
2141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002142PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143"S.lower() -> string\n\
2144\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002147/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2148#ifndef _tolower
2149#define _tolower tolower
2150#endif
2151
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002153string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002155 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002156 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002157 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002159 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002160 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002162
2163 s = PyString_AS_STRING(newobj);
2164
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002165 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002168 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002169 if (isupper(c))
2170 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172
Anthony Baxtera6286212006-04-11 07:42:36 +00002173 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174}
2175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002176PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177"S.upper() -> string\n\
2178\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002179Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002181#ifndef _toupper
2182#define _toupper toupper
2183#endif
2184
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002186string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002188 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002189 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002190 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002192 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002193 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002195
2196 s = PyString_AS_STRING(newobj);
2197
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002198 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002199
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002201 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002202 if (islower(c))
2203 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002205
Anthony Baxtera6286212006-04-11 07:42:36 +00002206 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207}
2208
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002209PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210"S.title() -> string\n\
2211\n\
2212Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214
2215static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002216string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217{
2218 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002219 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002221 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 newobj = PyString_FromStringAndSize(NULL, n);
2224 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227 for (i = 0; i < n; i++) {
2228 int c = Py_CHARMASK(*s++);
2229 if (islower(c)) {
2230 if (!previous_is_cased)
2231 c = toupper(c);
2232 previous_is_cased = 1;
2233 } else if (isupper(c)) {
2234 if (previous_is_cased)
2235 c = tolower(c);
2236 previous_is_cased = 1;
2237 } else
2238 previous_is_cased = 0;
2239 *s_new++ = c;
2240 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002241 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242}
2243
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002244PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245"S.capitalize() -> string\n\
2246\n\
2247Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002248capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249
2250static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002251string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252{
2253 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002254 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002255 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 newobj = PyString_FromStringAndSize(NULL, n);
2258 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002260 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261 if (0 < n) {
2262 int c = Py_CHARMASK(*s++);
2263 if (islower(c))
2264 *s_new = toupper(c);
2265 else
2266 *s_new = c;
2267 s_new++;
2268 }
2269 for (i = 1; i < n; i++) {
2270 int c = Py_CHARMASK(*s++);
2271 if (isupper(c))
2272 *s_new = tolower(c);
2273 else
2274 *s_new = c;
2275 s_new++;
2276 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002277 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278}
2279
2280
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002281PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282"S.count(sub[, start[, end]]) -> int\n\
2283\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002284Return the number of non-overlapping occurrences of substring sub in\n\
2285string S[start:end]. Optional arguments start and end are interpreted\n\
2286as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287
2288static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002289string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002291 PyObject *sub_obj;
2292 const char *str = PyString_AS_STRING(self), *sub;
2293 Py_ssize_t sub_len;
2294 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002296 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2297 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002299
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002300 if (PyString_Check(sub_obj)) {
2301 sub = PyString_AS_STRING(sub_obj);
2302 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002304#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002305 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002306 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002307 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002308 if (count == -1)
2309 return NULL;
2310 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002311 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002312 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002313#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002314 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 return NULL;
2316
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002317 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002318
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002319 return PyInt_FromSsize_t(
2320 stringlib_count(str + start, end - start, sub, sub_len)
2321 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322}
2323
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002324PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325"S.swapcase() -> string\n\
2326\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329
2330static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002331string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332{
2333 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002334 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002335 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336
Anthony Baxtera6286212006-04-11 07:42:36 +00002337 newobj = PyString_FromStringAndSize(NULL, n);
2338 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002340 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341 for (i = 0; i < n; i++) {
2342 int c = Py_CHARMASK(*s++);
2343 if (islower(c)) {
2344 *s_new = toupper(c);
2345 }
2346 else if (isupper(c)) {
2347 *s_new = tolower(c);
2348 }
2349 else
2350 *s_new = c;
2351 s_new++;
2352 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002353 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354}
2355
2356
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002357PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358"S.translate(table [,deletechars]) -> string\n\
2359\n\
2360Return a copy of the string S, where all characters occurring\n\
2361in the optional argument deletechars are removed, and the\n\
2362remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002363translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364
2365static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002366string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002369 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002370 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002372 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002373 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 PyObject *result;
2375 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002378 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381
2382 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002383 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 tablen = PyString_GET_SIZE(tableobj);
2385 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002386 else if (tableobj == Py_None) {
2387 table = NULL;
2388 tablen = 256;
2389 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002390#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002392 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 parameter; instead a mapping to None will cause characters
2394 to be deleted. */
2395 if (delobj != NULL) {
2396 PyErr_SetString(PyExc_TypeError,
2397 "deletions are implemented differently for unicode");
2398 return NULL;
2399 }
2400 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002402#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002403 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405
Martin v. Löwis00b61272002-12-12 20:03:19 +00002406 if (tablen != 256) {
2407 PyErr_SetString(PyExc_ValueError,
2408 "translation table must be 256 characters long");
2409 return NULL;
2410 }
2411
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 if (delobj != NULL) {
2413 if (PyString_Check(delobj)) {
2414 del_table = PyString_AS_STRING(delobj);
2415 dellen = PyString_GET_SIZE(delobj);
2416 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002417#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002418 else if (PyUnicode_Check(delobj)) {
2419 PyErr_SetString(PyExc_TypeError,
2420 "deletions are implemented differently for unicode");
2421 return NULL;
2422 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002423#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2425 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002426 }
2427 else {
2428 del_table = NULL;
2429 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 }
2431
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002432 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 result = PyString_FromStringAndSize((char *)NULL, inlen);
2434 if (result == NULL)
2435 return NULL;
2436 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002437 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002439 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440 /* If no deletions are required, use faster code */
2441 for (i = inlen; --i >= 0; ) {
2442 c = Py_CHARMASK(*input++);
2443 if (Py_CHARMASK((*output++ = table[c])) != c)
2444 changed = 1;
2445 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002446 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447 return result;
2448 Py_DECREF(result);
2449 Py_INCREF(input_obj);
2450 return input_obj;
2451 }
2452
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002453 if (table == NULL) {
2454 for (i = 0; i < 256; i++)
2455 trans_table[i] = Py_CHARMASK(i);
2456 } else {
2457 for (i = 0; i < 256; i++)
2458 trans_table[i] = Py_CHARMASK(table[i]);
2459 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002460
2461 for (i = 0; i < dellen; i++)
2462 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2463
2464 for (i = inlen; --i >= 0; ) {
2465 c = Py_CHARMASK(*input++);
2466 if (trans_table[c] != -1)
2467 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2468 continue;
2469 changed = 1;
2470 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002471 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472 Py_DECREF(result);
2473 Py_INCREF(input_obj);
2474 return input_obj;
2475 }
2476 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002477 if (inlen > 0)
2478 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479 return result;
2480}
2481
2482
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483#define FORWARD 1
2484#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002486/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002488#define findchar(target, target_len, c) \
2489 ((char *)memchr((const void *)(target), c, target_len))
2490
2491/* String ops must return a string. */
2492/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002493Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002494return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496 if (PyString_CheckExact(self)) {
2497 Py_INCREF(self);
2498 return self;
2499 }
2500 return (PyStringObject *)PyString_FromStringAndSize(
2501 PyString_AS_STRING(self),
2502 PyString_GET_SIZE(self));
2503}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002504
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002505Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002506countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002507{
2508 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002509 const char *start=target;
2510 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002512 while ( (start=findchar(start, end-start, c)) != NULL ) {
2513 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002514 if (count >= maxcount)
2515 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002516 start += 1;
2517 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002518 return count;
2519}
2520
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002521Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002522findstring(const char *target, Py_ssize_t target_len,
2523 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002524 Py_ssize_t start,
2525 Py_ssize_t end,
2526 int direction)
2527{
2528 if (start < 0) {
2529 start += target_len;
2530 if (start < 0)
2531 start = 0;
2532 }
2533 if (end > target_len) {
2534 end = target_len;
2535 } else if (end < 0) {
2536 end += target_len;
2537 if (end < 0)
2538 end = 0;
2539 }
2540
2541 /* zero-length substrings always match at the first attempt */
2542 if (pattern_len == 0)
2543 return (direction > 0) ? start : end;
2544
2545 end -= pattern_len;
2546
2547 if (direction < 0) {
2548 for (; end >= start; end--)
2549 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2550 return end;
2551 } else {
2552 for (; start <= end; start++)
2553 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2554 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002555 }
2556 return -1;
2557}
2558
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002559Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002560countstring(const char *target, Py_ssize_t target_len,
2561 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002562 Py_ssize_t start,
2563 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002564 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 if (start < 0) {
2569 start += target_len;
2570 if (start < 0)
2571 start = 0;
2572 }
2573 if (end > target_len) {
2574 end = target_len;
2575 } else if (end < 0) {
2576 end += target_len;
2577 if (end < 0)
2578 end = 0;
2579 }
2580
2581 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002582 if (pattern_len == 0 || maxcount == 0) {
2583 if (target_len+1 < maxcount)
2584 return target_len+1;
2585 return maxcount;
2586 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587
2588 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002590 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2592 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002593 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002594 end -= pattern_len-1;
2595 }
2596 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002597 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2599 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002600 if (--maxcount <= 0)
2601 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602 start += pattern_len-1;
2603 }
2604 }
2605 return count;
2606}
2607
2608
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002609/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002610
2611/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002612Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002614 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615 Py_ssize_t maxcount)
2616{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002617 char *self_s, *result_s;
2618 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002619 Py_ssize_t count, i, product;
2620 PyStringObject *result;
2621
2622 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002623
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002624 /* 1 at the end plus 1 after every character */
2625 count = self_len+1;
2626 if (maxcount < count)
2627 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002628
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002629 /* Check for overflow */
2630 /* result_len = count * to_len + self_len; */
2631 product = count * to_len;
2632 if (product / to_len != count) {
2633 PyErr_SetString(PyExc_OverflowError,
2634 "replace string is too long");
2635 return NULL;
2636 }
2637 result_len = product + self_len;
2638 if (result_len < 0) {
2639 PyErr_SetString(PyExc_OverflowError,
2640 "replace string is too long");
2641 return NULL;
2642 }
2643
2644 if (! (result = (PyStringObject *)
2645 PyString_FromStringAndSize(NULL, result_len)) )
2646 return NULL;
2647
2648 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649 result_s = PyString_AS_STRING(result);
2650
2651 /* TODO: special case single character, which doesn't need memcpy */
2652
2653 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002654 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002655 result_s += to_len;
2656 count -= 1;
2657
2658 for (i=0; i<count; i++) {
2659 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002660 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661 result_s += to_len;
2662 }
2663
2664 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002665 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666
2667 return result;
2668}
2669
2670/* Special case for deleting a single character */
2671/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002672Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002673replace_delete_single_character(PyStringObject *self,
2674 char from_c, Py_ssize_t maxcount)
2675{
2676 char *self_s, *result_s;
2677 char *start, *next, *end;
2678 Py_ssize_t self_len, result_len;
2679 Py_ssize_t count;
2680 PyStringObject *result;
2681
2682 self_len = PyString_GET_SIZE(self);
2683 self_s = PyString_AS_STRING(self);
2684
Andrew Dalke51324072006-05-26 20:25:22 +00002685 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002686 if (count == 0) {
2687 return return_self(self);
2688 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002689
2690 result_len = self_len - count; /* from_len == 1 */
2691 assert(result_len>=0);
2692
2693 if ( (result = (PyStringObject *)
2694 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2695 return NULL;
2696 result_s = PyString_AS_STRING(result);
2697
2698 start = self_s;
2699 end = self_s + self_len;
2700 while (count-- > 0) {
2701 next = findchar(start, end-start, from_c);
2702 if (next == NULL)
2703 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002704 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002705 result_s += (next-start);
2706 start = next+1;
2707 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002708 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002709
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002710 return result;
2711}
2712
2713/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2714
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002715Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002716replace_delete_substring(PyStringObject *self,
2717 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002719 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002720 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002721 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002722 Py_ssize_t count, offset;
2723 PyStringObject *result;
2724
2725 self_len = PyString_GET_SIZE(self);
2726 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002727
2728 count = countstring(self_s, self_len,
2729 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002730 0, self_len, 1,
2731 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002732
2733 if (count == 0) {
2734 /* no matches */
2735 return return_self(self);
2736 }
2737
2738 result_len = self_len - (count * from_len);
2739 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002740
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741 if ( (result = (PyStringObject *)
2742 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2743 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002744
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002745 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002746
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002747 start = self_s;
2748 end = self_s + self_len;
2749 while (count-- > 0) {
2750 offset = findstring(start, end-start,
2751 from_s, from_len,
2752 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002753 if (offset == -1)
2754 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002755 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002756
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002757 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002758
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759 result_s += (next-start);
2760 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002761 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002762 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002764}
2765
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002767Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002768replace_single_character_in_place(PyStringObject *self,
2769 char from_c, char to_c,
2770 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002771{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772 char *self_s, *result_s, *start, *end, *next;
2773 Py_ssize_t self_len;
2774 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002775
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776 /* The result string will be the same size */
2777 self_s = PyString_AS_STRING(self);
2778 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002779
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002781
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002782 if (next == NULL) {
2783 /* No matches; return the original string */
2784 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002785 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002786
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002787 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002788 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789 if (result == NULL)
2790 return NULL;
2791 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002792 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794 /* change everything in-place, starting with this one */
2795 start = result_s + (next-self_s);
2796 *start = to_c;
2797 start++;
2798 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 while (--maxcount > 0) {
2801 next = findchar(start, end-start, from_c);
2802 if (next == NULL)
2803 break;
2804 *next = to_c;
2805 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002806 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002807
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002809}
2810
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002811/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002812Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002814 const char *from_s, Py_ssize_t from_len,
2815 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 Py_ssize_t maxcount)
2817{
2818 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002819 char *self_s;
2820 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002822
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002824
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 self_s = PyString_AS_STRING(self);
2826 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002827
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 offset = findstring(self_s, self_len,
2829 from_s, from_len,
2830 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002831 if (offset == -1) {
2832 /* No matches; return the original string */
2833 return return_self(self);
2834 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002837 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002838 if (result == NULL)
2839 return NULL;
2840 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002841 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002842
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843 /* change everything in-place, starting with this one */
2844 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002845 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 start += from_len;
2847 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002848
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002849 while ( --maxcount > 0) {
2850 offset = findstring(start, end-start,
2851 from_s, from_len,
2852 0, end-start, FORWARD);
2853 if (offset==-1)
2854 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002855 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 start += offset+from_len;
2857 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002858
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859 return result;
2860}
2861
2862/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002863Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864replace_single_character(PyStringObject *self,
2865 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002866 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 Py_ssize_t maxcount)
2868{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002869 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002871 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 Py_ssize_t count, product;
2873 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875 self_s = PyString_AS_STRING(self);
2876 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002877
Andrew Dalke51324072006-05-26 20:25:22 +00002878 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 if (count == 0) {
2880 /* no matches, return unchanged */
2881 return return_self(self);
2882 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002883
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002884 /* use the difference between current and new, hence the "-1" */
2885 /* result_len = self_len + count * (to_len-1) */
2886 product = count * (to_len-1);
2887 if (product / (to_len-1) != count) {
2888 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2889 return NULL;
2890 }
2891 result_len = self_len + product;
2892 if (result_len < 0) {
2893 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2894 return NULL;
2895 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002896
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002897 if ( (result = (PyStringObject *)
2898 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2899 return NULL;
2900 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002901
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002902 start = self_s;
2903 end = self_s + self_len;
2904 while (count-- > 0) {
2905 next = findchar(start, end-start, from_c);
2906 if (next == NULL)
2907 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002908
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909 if (next == start) {
2910 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002911 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 result_s += to_len;
2913 start += 1;
2914 } else {
2915 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002916 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002918 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 result_s += to_len;
2920 start = next+1;
2921 }
2922 }
2923 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002924 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002925
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002926 return result;
2927}
2928
2929/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002930Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002931replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002932 const char *from_s, Py_ssize_t from_len,
2933 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002934 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002935 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002936 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002937 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002938 Py_ssize_t count, offset, product;
2939 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002940
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002941 self_s = PyString_AS_STRING(self);
2942 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002943
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002944 count = countstring(self_s, self_len,
2945 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002946 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002947 if (count == 0) {
2948 /* no matches, return unchanged */
2949 return return_self(self);
2950 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002951
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002952 /* Check for overflow */
2953 /* result_len = self_len + count * (to_len-from_len) */
2954 product = count * (to_len-from_len);
2955 if (product / (to_len-from_len) != count) {
2956 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2957 return NULL;
2958 }
2959 result_len = self_len + product;
2960 if (result_len < 0) {
2961 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2962 return NULL;
2963 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002964
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002965 if ( (result = (PyStringObject *)
2966 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2967 return NULL;
2968 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002969
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002970 start = self_s;
2971 end = self_s + self_len;
2972 while (count-- > 0) {
2973 offset = findstring(start, end-start,
2974 from_s, from_len,
2975 0, end-start, FORWARD);
2976 if (offset == -1)
2977 break;
2978 next = start+offset;
2979 if (next == start) {
2980 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002981 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982 result_s += to_len;
2983 start += from_len;
2984 } else {
2985 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002986 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002987 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002988 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002989 result_s += to_len;
2990 start = next+from_len;
2991 }
2992 }
2993 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002994 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002995
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002996 return result;
2997}
2998
2999
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003000Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003001replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003002 const char *from_s, Py_ssize_t from_len,
3003 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003004 Py_ssize_t maxcount)
3005{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003006 if (maxcount < 0) {
3007 maxcount = PY_SSIZE_T_MAX;
3008 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3009 /* nothing to do; return the original string */
3010 return return_self(self);
3011 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003012
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003013 if (maxcount == 0 ||
3014 (from_len == 0 && to_len == 0)) {
3015 /* nothing to do; return the original string */
3016 return return_self(self);
3017 }
3018
3019 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003020
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003021 if (from_len == 0) {
3022 /* insert the 'to' string everywhere. */
3023 /* >>> "Python".replace("", ".") */
3024 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003025 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003026 }
3027
3028 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3029 /* point for an empty self string to generate a non-empty string */
3030 /* Special case so the remaining code always gets a non-empty string */
3031 if (PyString_GET_SIZE(self) == 0) {
3032 return return_self(self);
3033 }
3034
3035 if (to_len == 0) {
3036 /* delete all occurances of 'from' string */
3037 if (from_len == 1) {
3038 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003039 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003040 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003041 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 }
3043 }
3044
3045 /* Handle special case where both strings have the same length */
3046
3047 if (from_len == to_len) {
3048 if (from_len == 1) {
3049 return replace_single_character_in_place(
3050 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003051 from_s[0],
3052 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 maxcount);
3054 } else {
3055 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003056 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 }
3058 }
3059
3060 /* Otherwise use the more generic algorithms */
3061 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003062 return replace_single_character(self, from_s[0],
3063 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 } else {
3065 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003066 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003067 }
3068}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003070PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003071"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003072\n\
3073Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003074old replaced by new. If the optional argument count is\n\
3075given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076
3077static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003078string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003079{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003080 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003082 const char *from_s, *to_s;
3083 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003084
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003085 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003088 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003089 from_s = PyString_AS_STRING(from);
3090 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003092#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003093 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003094 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003095 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003096#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003097 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003098 return NULL;
3099
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003100 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003101 to_s = PyString_AS_STRING(to);
3102 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003104#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003105 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003106 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003107 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003108#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003109 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110 return NULL;
3111
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003112 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003113 from_s, from_len,
3114 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003115}
3116
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003117/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003118
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003119/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003120 * against substr, using the start and end arguments. Returns
3121 * -1 on error, 0 if not found and 1 if found.
3122 */
3123Py_LOCAL(int)
3124_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3125 Py_ssize_t end, int direction)
3126{
3127 Py_ssize_t len = PyString_GET_SIZE(self);
3128 Py_ssize_t slen;
3129 const char* sub;
3130 const char* str;
3131
3132 if (PyString_Check(substr)) {
3133 sub = PyString_AS_STRING(substr);
3134 slen = PyString_GET_SIZE(substr);
3135 }
3136#ifdef Py_USING_UNICODE
3137 else if (PyUnicode_Check(substr))
3138 return PyUnicode_Tailmatch((PyObject *)self,
3139 substr, start, end, direction);
3140#endif
3141 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3142 return -1;
3143 str = PyString_AS_STRING(self);
3144
3145 string_adjust_indices(&start, &end, len);
3146
3147 if (direction < 0) {
3148 /* startswith */
3149 if (start+slen > len)
3150 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003151 } else {
3152 /* endswith */
3153 if (end-start < slen || start > len)
3154 return 0;
3155
3156 if (end-slen > start)
3157 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003158 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003159 if (end-start >= slen)
3160 return ! memcmp(str+start, sub, slen);
3161 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003162}
3163
3164
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003165PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003166"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003167\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003168Return True if S starts with the specified prefix, False otherwise.\n\
3169With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003170With optional end, stop comparing S at that position.\n\
3171prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172
3173static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003174string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003175{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003176 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003177 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003178 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003179 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180
Guido van Rossumc6821402000-05-08 14:08:05 +00003181 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3182 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003183 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003184 if (PyTuple_Check(subobj)) {
3185 Py_ssize_t i;
3186 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3187 result = _string_tailmatch(self,
3188 PyTuple_GET_ITEM(subobj, i),
3189 start, end, -1);
3190 if (result == -1)
3191 return NULL;
3192 else if (result) {
3193 Py_RETURN_TRUE;
3194 }
3195 }
3196 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003197 }
Georg Brandl24250812006-06-09 18:45:48 +00003198 result = _string_tailmatch(self, subobj, start, end, -1);
3199 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003200 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003201 else
Georg Brandl24250812006-06-09 18:45:48 +00003202 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203}
3204
3205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003206PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003207"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003208\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003209Return True if S ends with the specified suffix, False otherwise.\n\
3210With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003211With optional end, stop comparing S at that position.\n\
3212suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213
3214static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003215string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003217 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003218 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003219 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003220 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003221
Guido van Rossumc6821402000-05-08 14:08:05 +00003222 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3223 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003225 if (PyTuple_Check(subobj)) {
3226 Py_ssize_t i;
3227 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3228 result = _string_tailmatch(self,
3229 PyTuple_GET_ITEM(subobj, i),
3230 start, end, +1);
3231 if (result == -1)
3232 return NULL;
3233 else if (result) {
3234 Py_RETURN_TRUE;
3235 }
3236 }
3237 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238 }
Georg Brandl24250812006-06-09 18:45:48 +00003239 result = _string_tailmatch(self, subobj, start, end, +1);
3240 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003241 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003242 else
Georg Brandl24250812006-06-09 18:45:48 +00003243 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003244}
3245
3246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003247PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003248"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003249\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003250Encodes S using the codec registered for encoding. encoding defaults\n\
3251to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003252handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003253a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3254'xmlcharrefreplace' as well as any other name registered with\n\
3255codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003256
3257static PyObject *
3258string_encode(PyStringObject *self, PyObject *args)
3259{
3260 char *encoding = NULL;
3261 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003262 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003263
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003264 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3265 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003266 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003267 if (v == NULL)
3268 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003269 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3270 PyErr_Format(PyExc_TypeError,
3271 "encoder did not return a string/unicode object "
3272 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003273 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003274 Py_DECREF(v);
3275 return NULL;
3276 }
3277 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003278
3279 onError:
3280 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003281}
3282
3283
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003284PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003285"S.decode([encoding[,errors]]) -> object\n\
3286\n\
3287Decodes S using the codec registered for encoding. encoding defaults\n\
3288to the default encoding. errors may be given to set a different error\n\
3289handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003290a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3291as well as any other name registerd with codecs.register_error that is\n\
3292able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003293
3294static PyObject *
3295string_decode(PyStringObject *self, PyObject *args)
3296{
3297 char *encoding = NULL;
3298 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003299 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003300
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003301 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3302 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003303 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003304 if (v == NULL)
3305 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003306 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3307 PyErr_Format(PyExc_TypeError,
3308 "decoder did not return a string/unicode object "
3309 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003310 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003311 Py_DECREF(v);
3312 return NULL;
3313 }
3314 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003315
3316 onError:
3317 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003318}
3319
3320
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003321PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003322"S.expandtabs([tabsize]) -> string\n\
3323\n\
3324Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003325If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003326
3327static PyObject*
3328string_expandtabs(PyStringObject *self, PyObject *args)
3329{
3330 const char *e, *p;
3331 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003332 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003333 PyObject *u;
3334 int tabsize = 8;
3335
3336 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3337 return NULL;
3338
Thomas Wouters7e474022000-07-16 12:04:32 +00003339 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003340 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3342 for (p = PyString_AS_STRING(self); p < e; p++)
3343 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003344 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003345 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003346 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003347 PyErr_SetString(PyExc_OverflowError,
3348 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003349 return NULL;
3350 }
3351 old_j = j;
3352 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353 }
3354 else {
3355 j++;
3356 if (*p == '\n' || *p == '\r') {
3357 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003358 old_j = j = 0;
3359 if (i < 0) {
3360 PyErr_SetString(PyExc_OverflowError,
3361 "new string is too long");
3362 return NULL;
3363 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364 }
3365 }
3366
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003367 if ((i + j) < 0) {
3368 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3369 return NULL;
3370 }
3371
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372 /* Second pass: create output string and fill it */
3373 u = PyString_FromStringAndSize(NULL, i + j);
3374 if (!u)
3375 return NULL;
3376
3377 j = 0;
3378 q = PyString_AS_STRING(u);
3379
3380 for (p = PyString_AS_STRING(self); p < e; p++)
3381 if (*p == '\t') {
3382 if (tabsize > 0) {
3383 i = tabsize - (j % tabsize);
3384 j += i;
3385 while (i--)
3386 *q++ = ' ';
3387 }
3388 }
3389 else {
3390 j++;
3391 *q++ = *p;
3392 if (*p == '\n' || *p == '\r')
3393 j = 0;
3394 }
3395
3396 return u;
3397}
3398
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003399Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003400pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401{
3402 PyObject *u;
3403
3404 if (left < 0)
3405 left = 0;
3406 if (right < 0)
3407 right = 0;
3408
Tim Peters8fa5dd02001-09-12 02:18:30 +00003409 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 Py_INCREF(self);
3411 return (PyObject *)self;
3412 }
3413
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003414 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415 left + PyString_GET_SIZE(self) + right);
3416 if (u) {
3417 if (left)
3418 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003419 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003420 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003421 PyString_GET_SIZE(self));
3422 if (right)
3423 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3424 fill, right);
3425 }
3426
3427 return u;
3428}
3429
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003430PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003431"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003432"\n"
3433"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435
3436static PyObject *
3437string_ljust(PyStringObject *self, PyObject *args)
3438{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003439 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003440 char fillchar = ' ';
3441
Thomas Wouters4abb3662006-04-19 14:50:15 +00003442 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443 return NULL;
3444
Tim Peters8fa5dd02001-09-12 02:18:30 +00003445 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446 Py_INCREF(self);
3447 return (PyObject*) self;
3448 }
3449
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003450 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451}
3452
3453
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003454PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003456"\n"
3457"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003458"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003459
3460static PyObject *
3461string_rjust(PyStringObject *self, PyObject *args)
3462{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003463 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003464 char fillchar = ' ';
3465
Thomas Wouters4abb3662006-04-19 14:50:15 +00003466 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003467 return NULL;
3468
Tim Peters8fa5dd02001-09-12 02:18:30 +00003469 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003470 Py_INCREF(self);
3471 return (PyObject*) self;
3472 }
3473
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475}
3476
3477
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003478PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003479"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003480"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003481"Return S centered in a string of length width. Padding is\n"
3482"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003483
3484static PyObject *
3485string_center(PyStringObject *self, PyObject *args)
3486{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003487 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003488 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003490
Thomas Wouters4abb3662006-04-19 14:50:15 +00003491 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492 return NULL;
3493
Tim Peters8fa5dd02001-09-12 02:18:30 +00003494 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495 Py_INCREF(self);
3496 return (PyObject*) self;
3497 }
3498
3499 marg = width - PyString_GET_SIZE(self);
3500 left = marg / 2 + (marg & width & 1);
3501
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003502 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503}
3504
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003505PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003506"S.zfill(width) -> string\n"
3507"\n"
3508"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003509"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003510
3511static PyObject *
3512string_zfill(PyStringObject *self, PyObject *args)
3513{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003514 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003515 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003516 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003517 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003518
Thomas Wouters4abb3662006-04-19 14:50:15 +00003519 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003520 return NULL;
3521
3522 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003523 if (PyString_CheckExact(self)) {
3524 Py_INCREF(self);
3525 return (PyObject*) self;
3526 }
3527 else
3528 return PyString_FromStringAndSize(
3529 PyString_AS_STRING(self),
3530 PyString_GET_SIZE(self)
3531 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003532 }
3533
3534 fill = width - PyString_GET_SIZE(self);
3535
3536 s = pad(self, fill, 0, '0');
3537
3538 if (s == NULL)
3539 return NULL;
3540
3541 p = PyString_AS_STRING(s);
3542 if (p[fill] == '+' || p[fill] == '-') {
3543 /* move sign to beginning of string */
3544 p[0] = p[fill];
3545 p[fill] = '0';
3546 }
3547
3548 return (PyObject*) s;
3549}
3550
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003551PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003552"S.isspace() -> bool\n\
3553\n\
3554Return True if all characters in S are whitespace\n\
3555and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003556
3557static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003558string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003559{
Fred Drakeba096332000-07-09 07:04:36 +00003560 register const unsigned char *p
3561 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003562 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003563
Guido van Rossum4c08d552000-03-10 22:55:18 +00003564 /* Shortcut for single character strings */
3565 if (PyString_GET_SIZE(self) == 1 &&
3566 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003568
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003569 /* Special case for empty strings */
3570 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003571 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003572
Guido van Rossum4c08d552000-03-10 22:55:18 +00003573 e = p + PyString_GET_SIZE(self);
3574 for (; p < e; p++) {
3575 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003579}
3580
3581
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003582PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003583"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003585Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003586and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587
3588static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003589string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003590{
Fred Drakeba096332000-07-09 07:04:36 +00003591 register const unsigned char *p
3592 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593 register const unsigned char *e;
3594
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003595 /* Shortcut for single character strings */
3596 if (PyString_GET_SIZE(self) == 1 &&
3597 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599
3600 /* Special case for empty strings */
3601 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003602 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003603
3604 e = p + PyString_GET_SIZE(self);
3605 for (; p < e; p++) {
3606 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003608 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610}
3611
3612
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003613PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003615\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003616Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003617and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618
3619static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003620string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003621{
Fred Drakeba096332000-07-09 07:04:36 +00003622 register const unsigned char *p
3623 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003624 register const unsigned char *e;
3625
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003626 /* Shortcut for single character strings */
3627 if (PyString_GET_SIZE(self) == 1 &&
3628 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003630
3631 /* Special case for empty strings */
3632 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003634
3635 e = p + PyString_GET_SIZE(self);
3636 for (; p < e; p++) {
3637 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003639 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003641}
3642
3643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003644PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003647Return True if all characters in S are digits\n\
3648and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649
3650static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003651string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652{
Fred Drakeba096332000-07-09 07:04:36 +00003653 register const unsigned char *p
3654 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003655 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 /* Shortcut for single character strings */
3658 if (PyString_GET_SIZE(self) == 1 &&
3659 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003662 /* Special case for empty strings */
3663 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003665
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666 e = p + PyString_GET_SIZE(self);
3667 for (; p < e; p++) {
3668 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672}
3673
3674
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003675PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003678Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003679at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680
3681static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003682string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683{
Fred Drakeba096332000-07-09 07:04:36 +00003684 register const unsigned char *p
3685 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003686 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687 int cased;
3688
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689 /* Shortcut for single character strings */
3690 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003693 /* Special case for empty strings */
3694 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003695 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003696
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697 e = p + PyString_GET_SIZE(self);
3698 cased = 0;
3699 for (; p < e; p++) {
3700 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 else if (!cased && islower(*p))
3703 cased = 1;
3704 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706}
3707
3708
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003709PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003712Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003713at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714
3715static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003716string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717{
Fred Drakeba096332000-07-09 07:04:36 +00003718 register const unsigned char *p
3719 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003720 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721 int cased;
3722
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723 /* Shortcut for single character strings */
3724 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003727 /* Special case for empty strings */
3728 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003730
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 e = p + PyString_GET_SIZE(self);
3732 cased = 0;
3733 for (; p < e; p++) {
3734 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003735 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736 else if (!cased && isupper(*p))
3737 cased = 1;
3738 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740}
3741
3742
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003743PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003746Return True if S is a titlecased string and there is at least one\n\
3747character in S, i.e. uppercase characters may only follow uncased\n\
3748characters and lowercase characters only cased ones. Return False\n\
3749otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750
3751static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003752string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753{
Fred Drakeba096332000-07-09 07:04:36 +00003754 register const unsigned char *p
3755 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003756 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 int cased, previous_is_cased;
3758
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759 /* Shortcut for single character strings */
3760 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003761 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003762
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003763 /* Special case for empty strings */
3764 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003765 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003766
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 e = p + PyString_GET_SIZE(self);
3768 cased = 0;
3769 previous_is_cased = 0;
3770 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003771 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772
3773 if (isupper(ch)) {
3774 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003775 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776 previous_is_cased = 1;
3777 cased = 1;
3778 }
3779 else if (islower(ch)) {
3780 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003781 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 previous_is_cased = 1;
3783 cased = 1;
3784 }
3785 else
3786 previous_is_cased = 0;
3787 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003788 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789}
3790
3791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003792PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003793"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794\n\
3795Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003796Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003797is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799static PyObject*
3800string_splitlines(PyStringObject *self, PyObject *args)
3801{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003802 register Py_ssize_t i;
3803 register Py_ssize_t j;
3804 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003805 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003806 PyObject *list;
3807 PyObject *str;
3808 char *data;
3809
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003810 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811 return NULL;
3812
3813 data = PyString_AS_STRING(self);
3814 len = PyString_GET_SIZE(self);
3815
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003816 /* This does not use the preallocated list because splitlines is
3817 usually run with hundreds of newlines. The overhead of
3818 switching between PyList_SET_ITEM and append causes about a
3819 2-3% slowdown for that common case. A smarter implementation
3820 could move the if check out, so the SET_ITEMs are done first
3821 and the appends only done when the prealloc buffer is full.
3822 That's too much work for little gain.*/
3823
Guido van Rossum4c08d552000-03-10 22:55:18 +00003824 list = PyList_New(0);
3825 if (!list)
3826 goto onError;
3827
3828 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003829 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003830
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831 /* Find a line and append it */
3832 while (i < len && data[i] != '\n' && data[i] != '\r')
3833 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834
3835 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003836 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837 if (i < len) {
3838 if (data[i] == '\r' && i + 1 < len &&
3839 data[i+1] == '\n')
3840 i += 2;
3841 else
3842 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003843 if (keepends)
3844 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003846 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847 j = i;
3848 }
3849 if (j < len) {
3850 SPLIT_APPEND(data, j, len);
3851 }
3852
3853 return list;
3854
3855 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003856 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857 return NULL;
3858}
3859
3860#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003861#undef SPLIT_ADD
3862#undef MAX_PREALLOC
3863#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003864
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003865static PyObject *
3866string_getnewargs(PyStringObject *v)
3867{
Martin v. Löwis68192102007-07-21 06:55:02 +00003868 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003869}
3870
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003871
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003872static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003873string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003874 /* Counterparts of the obsolete stropmodule functions; except
3875 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003876 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3877 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003878 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003879 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3880 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003881 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3882 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3883 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3884 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3885 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3886 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3887 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003888 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3889 capitalize__doc__},
3890 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3891 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3892 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003893 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003894 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3895 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3896 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3897 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3898 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3899 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3900 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003901 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3902 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003903 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3904 startswith__doc__},
3905 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3906 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3907 swapcase__doc__},
3908 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3909 translate__doc__},
3910 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3911 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3912 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3913 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3914 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3915 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3916 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3917 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3918 expandtabs__doc__},
3919 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3920 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003921 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003922 {NULL, NULL} /* sentinel */
3923};
3924
Jeremy Hylton938ace62002-07-17 16:30:39 +00003925static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003926str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3927
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003928static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003929string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003930{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003931 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003932 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003933
Guido van Rossumae960af2001-08-30 03:11:59 +00003934 if (type != &PyString_Type)
3935 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003936 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3937 return NULL;
3938 if (x == NULL)
3939 return PyString_FromString("");
3940 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003941}
3942
Guido van Rossumae960af2001-08-30 03:11:59 +00003943static PyObject *
3944str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3945{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003946 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003947 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003948
3949 assert(PyType_IsSubtype(type, &PyString_Type));
3950 tmp = string_new(&PyString_Type, args, kwds);
3951 if (tmp == NULL)
3952 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003953 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003954 n = PyString_GET_SIZE(tmp);
3955 pnew = type->tp_alloc(type, n);
3956 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003957 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003958 ((PyStringObject *)pnew)->ob_shash =
3959 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003960 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003961 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003962 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003963 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003964}
3965
Guido van Rossumcacfc072002-05-24 19:01:59 +00003966static PyObject *
3967basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3968{
3969 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003970 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003971 return NULL;
3972}
3973
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003974static PyObject *
3975string_mod(PyObject *v, PyObject *w)
3976{
3977 if (!PyString_Check(v)) {
3978 Py_INCREF(Py_NotImplemented);
3979 return Py_NotImplemented;
3980 }
3981 return PyString_Format(v, w);
3982}
3983
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003984PyDoc_STRVAR(basestring_doc,
3985"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003986
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003987static PyNumberMethods string_as_number = {
3988 0, /*nb_add*/
3989 0, /*nb_subtract*/
3990 0, /*nb_multiply*/
3991 0, /*nb_divide*/
3992 string_mod, /*nb_remainder*/
3993};
3994
3995
Guido van Rossumcacfc072002-05-24 19:01:59 +00003996PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00003997 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003998 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003999 0,
4000 0,
4001 0, /* tp_dealloc */
4002 0, /* tp_print */
4003 0, /* tp_getattr */
4004 0, /* tp_setattr */
4005 0, /* tp_compare */
4006 0, /* tp_repr */
4007 0, /* tp_as_number */
4008 0, /* tp_as_sequence */
4009 0, /* tp_as_mapping */
4010 0, /* tp_hash */
4011 0, /* tp_call */
4012 0, /* tp_str */
4013 0, /* tp_getattro */
4014 0, /* tp_setattro */
4015 0, /* tp_as_buffer */
4016 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4017 basestring_doc, /* tp_doc */
4018 0, /* tp_traverse */
4019 0, /* tp_clear */
4020 0, /* tp_richcompare */
4021 0, /* tp_weaklistoffset */
4022 0, /* tp_iter */
4023 0, /* tp_iternext */
4024 0, /* tp_methods */
4025 0, /* tp_members */
4026 0, /* tp_getset */
4027 &PyBaseObject_Type, /* tp_base */
4028 0, /* tp_dict */
4029 0, /* tp_descr_get */
4030 0, /* tp_descr_set */
4031 0, /* tp_dictoffset */
4032 0, /* tp_init */
4033 0, /* tp_alloc */
4034 basestring_new, /* tp_new */
4035 0, /* tp_free */
4036};
4037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004038PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039"str(object) -> string\n\
4040\n\
4041Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004042If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004044PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004045 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004046 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004048 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004049 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050 (printfunc)string_print, /* tp_print */
4051 0, /* tp_getattr */
4052 0, /* tp_setattr */
4053 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004054 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004055 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004056 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004057 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004058 (hashfunc)string_hash, /* tp_hash */
4059 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004060 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004061 PyObject_GenericGetAttr, /* tp_getattro */
4062 0, /* tp_setattro */
4063 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004064 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004065 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004066 string_doc, /* tp_doc */
4067 0, /* tp_traverse */
4068 0, /* tp_clear */
4069 (richcmpfunc)string_richcompare, /* tp_richcompare */
4070 0, /* tp_weaklistoffset */
4071 0, /* tp_iter */
4072 0, /* tp_iternext */
4073 string_methods, /* tp_methods */
4074 0, /* tp_members */
4075 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004076 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004077 0, /* tp_dict */
4078 0, /* tp_descr_get */
4079 0, /* tp_descr_set */
4080 0, /* tp_dictoffset */
4081 0, /* tp_init */
4082 0, /* tp_alloc */
4083 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004084 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085};
4086
4087void
Fred Drakeba096332000-07-09 07:04:36 +00004088PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004090 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004091 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004093 if (w == NULL || !PyString_Check(*pv)) {
4094 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004095 *pv = NULL;
4096 return;
4097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004098 v = string_concat((PyStringObject *) *pv, w);
4099 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100 *pv = v;
4101}
4102
Guido van Rossum013142a1994-08-30 08:19:36 +00004103void
Fred Drakeba096332000-07-09 07:04:36 +00004104PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004105{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 PyString_Concat(pv, w);
4107 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004108}
4109
4110
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111/* The following function breaks the notion that strings are immutable:
4112 it changes the size of a string. We get away with this only if there
4113 is only one module referencing the object. You can also think of it
4114 as creating a new string object and destroying the old one, only
4115 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004116 already be known to some other part of the code...
4117 Note that if there's not enough memory to resize the string, the original
4118 string object at *pv is deallocated, *pv is set to NULL, an "out of
4119 memory" exception is set, and -1 is returned. Else (on success) 0 is
4120 returned, and the value in *pv may or may not be the same as on input.
4121 As always, an extra byte is allocated for a trailing \0 byte (newsize
4122 does *not* include that), and a trailing \0 byte is stored.
4123*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124
4125int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004126_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004128 register PyObject *v;
4129 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004130 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004131 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004132 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004133 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 Py_DECREF(v);
4135 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004136 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004137 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004138 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004139 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004140 _Py_ForgetReference(v);
4141 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004142 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004143 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004144 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004145 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004146 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004148 _Py_NewReference(*pv);
4149 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004150 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004151 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004152 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004153 return 0;
4154}
Guido van Rossume5372401993-03-16 12:15:04 +00004155
4156/* Helpers for formatstring */
4157
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004158Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004159getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004160{
Thomas Wouters977485d2006-02-16 15:59:12 +00004161 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004162 if (argidx < arglen) {
4163 (*p_argidx)++;
4164 if (arglen < 0)
4165 return args;
4166 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004168 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004169 PyErr_SetString(PyExc_TypeError,
4170 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004171 return NULL;
4172}
4173
Tim Peters38fd5b62000-09-21 05:43:11 +00004174/* Format codes
4175 * F_LJUST '-'
4176 * F_SIGN '+'
4177 * F_BLANK ' '
4178 * F_ALT '#'
4179 * F_ZERO '0'
4180 */
Guido van Rossume5372401993-03-16 12:15:04 +00004181#define F_LJUST (1<<0)
4182#define F_SIGN (1<<1)
4183#define F_BLANK (1<<2)
4184#define F_ALT (1<<3)
4185#define F_ZERO (1<<4)
4186
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004187Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004188formatfloat(char *buf, size_t buflen, int flags,
4189 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004190{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004191 /* fmt = '%#.' + `prec` + `type`
4192 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004193 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004194 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004195 x = PyFloat_AsDouble(v);
4196 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004197 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004198 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004199 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004200 }
Guido van Rossume5372401993-03-16 12:15:04 +00004201 if (prec < 0)
4202 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004203 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4204 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004205 /* Worst case length calc to ensure no buffer overrun:
4206
4207 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004208 fmt = %#.<prec>g
4209 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004210 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004211 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004212
4213 'f' formats:
4214 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4215 len = 1 + 50 + 1 + prec = 52 + prec
4216
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004217 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004218 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004219
4220 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004221 if (((type == 'g' || type == 'G') &&
4222 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004223 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004224 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004225 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004226 return -1;
4227 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004228 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4229 (flags&F_ALT) ? "#" : "",
4230 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004231 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004232 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004233}
4234
Tim Peters38fd5b62000-09-21 05:43:11 +00004235/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4236 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4237 * Python's regular ints.
4238 * Return value: a new PyString*, or NULL if error.
4239 * . *pbuf is set to point into it,
4240 * *plen set to the # of chars following that.
4241 * Caller must decref it when done using pbuf.
4242 * The string starting at *pbuf is of the form
4243 * "-"? ("0x" | "0X")? digit+
4244 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004245 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004246 * There will be at least prec digits, zero-filled on the left if
4247 * necessary to get that many.
4248 * val object to be converted
4249 * flags bitmask of format flags; only F_ALT is looked at
4250 * prec minimum number of digits; 0-fill on left if needed
4251 * type a character in [duoxX]; u acts the same as d
4252 *
4253 * CAUTION: o, x and X conversions on regular ints can never
4254 * produce a '-' sign, but can for Python's unbounded ints.
4255 */
4256PyObject*
4257_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4258 char **pbuf, int *plen)
4259{
4260 PyObject *result = NULL;
4261 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004262 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004263 int sign; /* 1 if '-', else 0 */
4264 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004265 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004266 int numdigits; /* len == numnondigits + numdigits */
4267 int numnondigits = 0;
4268
4269 switch (type) {
4270 case 'd':
4271 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004272 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004273 break;
4274 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004275 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004276 break;
4277 case 'x':
4278 case 'X':
4279 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004280 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004281 break;
4282 default:
4283 assert(!"'type' not in [duoxX]");
4284 }
4285 if (!result)
4286 return NULL;
4287
Neal Norwitz56423e52006-08-13 18:11:08 +00004288 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004289 if (!buf) {
4290 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004291 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004292 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004293
Tim Peters38fd5b62000-09-21 05:43:11 +00004294 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004295 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004296 PyErr_BadInternalCall();
4297 return NULL;
4298 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004299 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004300 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004301 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4302 return NULL;
4303 }
4304 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004305 if (buf[len-1] == 'L') {
4306 --len;
4307 buf[len] = '\0';
4308 }
4309 sign = buf[0] == '-';
4310 numnondigits += sign;
4311 numdigits = len - numnondigits;
4312 assert(numdigits > 0);
4313
Tim Petersfff53252001-04-12 18:38:48 +00004314 /* Get rid of base marker unless F_ALT */
4315 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004316 /* Need to skip 0x, 0X or 0. */
4317 int skipped = 0;
4318 switch (type) {
4319 case 'o':
4320 assert(buf[sign] == '0');
4321 /* If 0 is only digit, leave it alone. */
4322 if (numdigits > 1) {
4323 skipped = 1;
4324 --numdigits;
4325 }
4326 break;
4327 case 'x':
4328 case 'X':
4329 assert(buf[sign] == '0');
4330 assert(buf[sign + 1] == 'x');
4331 skipped = 2;
4332 numnondigits -= 2;
4333 break;
4334 }
4335 if (skipped) {
4336 buf += skipped;
4337 len -= skipped;
4338 if (sign)
4339 buf[0] = '-';
4340 }
4341 assert(len == numnondigits + numdigits);
4342 assert(numdigits > 0);
4343 }
4344
4345 /* Fill with leading zeroes to meet minimum width. */
4346 if (prec > numdigits) {
4347 PyObject *r1 = PyString_FromStringAndSize(NULL,
4348 numnondigits + prec);
4349 char *b1;
4350 if (!r1) {
4351 Py_DECREF(result);
4352 return NULL;
4353 }
4354 b1 = PyString_AS_STRING(r1);
4355 for (i = 0; i < numnondigits; ++i)
4356 *b1++ = *buf++;
4357 for (i = 0; i < prec - numdigits; i++)
4358 *b1++ = '0';
4359 for (i = 0; i < numdigits; i++)
4360 *b1++ = *buf++;
4361 *b1 = '\0';
4362 Py_DECREF(result);
4363 result = r1;
4364 buf = PyString_AS_STRING(result);
4365 len = numnondigits + prec;
4366 }
4367
4368 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004369 if (type == 'X') {
4370 /* Need to convert all lower case letters to upper case.
4371 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004372 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004373 if (buf[i] >= 'a' && buf[i] <= 'x')
4374 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004375 }
4376 *pbuf = buf;
4377 *plen = len;
4378 return result;
4379}
4380
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004381Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004382formatint(char *buf, size_t buflen, int flags,
4383 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004384{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004385 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004386 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4387 + 1 + 1 = 24 */
4388 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004389 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004390 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004391
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004392 x = PyInt_AsLong(v);
4393 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004394 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004395 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004396 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004397 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004398 if (x < 0 && type == 'u') {
4399 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004400 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004401 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4402 sign = "-";
4403 else
4404 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004405 if (prec < 0)
4406 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004407
4408 if ((flags & F_ALT) &&
4409 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004410 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004411 * of issues that cause pain:
4412 * - when 0 is being converted, the C standard leaves off
4413 * the '0x' or '0X', which is inconsistent with other
4414 * %#x/%#X conversions and inconsistent with Python's
4415 * hex() function
4416 * - there are platforms that violate the standard and
4417 * convert 0 with the '0x' or '0X'
4418 * (Metrowerks, Compaq Tru64)
4419 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004420 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004421 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004422 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004423 * We can achieve the desired consistency by inserting our
4424 * own '0x' or '0X' prefix, and substituting %x/%X in place
4425 * of %#x/%#X.
4426 *
4427 * Note that this is the same approach as used in
4428 * formatint() in unicodeobject.c
4429 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004430 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4431 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004432 }
4433 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004434 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4435 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004436 prec, type);
4437 }
4438
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004439 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4440 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004441 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004442 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004443 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004444 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004445 return -1;
4446 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004447 if (sign[0])
4448 PyOS_snprintf(buf, buflen, fmt, -x);
4449 else
4450 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004451 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004452}
4453
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004454Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004455formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004456{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004457 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004458 if (PyString_Check(v)) {
4459 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004460 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004461 }
4462 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004464 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004465 }
4466 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004467 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004468}
4469
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004470/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4471
4472 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4473 chars are formatted. XXX This is a magic number. Each formatting
4474 routine does bounds checking to ensure no overflow, but a better
4475 solution may be to malloc a buffer of appropriate size for each
4476 format. For now, the current solution is sufficient.
4477*/
4478#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004479
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004481PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004482{
4483 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004484 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004485 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004486 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004487 PyObject *result, *orig_args;
4488#ifdef Py_USING_UNICODE
4489 PyObject *v, *w;
4490#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004491 PyObject *dict = NULL;
4492 if (format == NULL || !PyString_Check(format) || args == NULL) {
4493 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004494 return NULL;
4495 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004496 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004497 fmt = PyString_AS_STRING(format);
4498 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004499 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004501 if (result == NULL)
4502 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004503 res = PyString_AsString(result);
4504 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004505 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004506 argidx = 0;
4507 }
4508 else {
4509 arglen = -1;
4510 argidx = -2;
4511 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004512 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004513 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004514 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004515 while (--fmtcnt >= 0) {
4516 if (*fmt != '%') {
4517 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004518 rescnt = fmtcnt + 100;
4519 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004521 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004522 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004523 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004524 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004525 }
4526 *res++ = *fmt++;
4527 }
4528 else {
4529 /* Got a format specifier */
4530 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004531 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004532 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004533 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004534 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 PyObject *v = NULL;
4536 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004537 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004538 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004539 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004540 char formatbuf[FORMATBUFLEN];
4541 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004542#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004543 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004544 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004545#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004546
Guido van Rossumda9c2711996-12-05 21:58:58 +00004547 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004548 if (*fmt == '(') {
4549 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004550 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004551 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004552 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004553
4554 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004556 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004557 goto error;
4558 }
4559 ++fmt;
4560 --fmtcnt;
4561 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004562 /* Skip over balanced parentheses */
4563 while (pcount > 0 && --fmtcnt >= 0) {
4564 if (*fmt == ')')
4565 --pcount;
4566 else if (*fmt == '(')
4567 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004569 }
4570 keylen = fmt - keystart - 1;
4571 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004573 "incomplete format key");
4574 goto error;
4575 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 key = PyString_FromStringAndSize(keystart,
4577 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004578 if (key == NULL)
4579 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004580 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004581 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004582 args_owned = 0;
4583 }
4584 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004585 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004586 if (args == NULL) {
4587 goto error;
4588 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004589 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004590 arglen = -1;
4591 argidx = -2;
4592 }
Guido van Rossume5372401993-03-16 12:15:04 +00004593 while (--fmtcnt >= 0) {
4594 switch (c = *fmt++) {
4595 case '-': flags |= F_LJUST; continue;
4596 case '+': flags |= F_SIGN; continue;
4597 case ' ': flags |= F_BLANK; continue;
4598 case '#': flags |= F_ALT; continue;
4599 case '0': flags |= F_ZERO; continue;
4600 }
4601 break;
4602 }
4603 if (c == '*') {
4604 v = getnextarg(args, arglen, &argidx);
4605 if (v == NULL)
4606 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004607 if (!PyInt_Check(v)) {
4608 PyErr_SetString(PyExc_TypeError,
4609 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004610 goto error;
4611 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004612 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004613 if (width < 0) {
4614 flags |= F_LJUST;
4615 width = -width;
4616 }
Guido van Rossume5372401993-03-16 12:15:04 +00004617 if (--fmtcnt >= 0)
4618 c = *fmt++;
4619 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004620 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004621 width = c - '0';
4622 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004623 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004624 if (!isdigit(c))
4625 break;
4626 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 PyErr_SetString(
4628 PyExc_ValueError,
4629 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004630 goto error;
4631 }
4632 width = width*10 + (c - '0');
4633 }
4634 }
4635 if (c == '.') {
4636 prec = 0;
4637 if (--fmtcnt >= 0)
4638 c = *fmt++;
4639 if (c == '*') {
4640 v = getnextarg(args, arglen, &argidx);
4641 if (v == NULL)
4642 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004643 if (!PyInt_Check(v)) {
4644 PyErr_SetString(
4645 PyExc_TypeError,
4646 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004647 goto error;
4648 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004650 if (prec < 0)
4651 prec = 0;
4652 if (--fmtcnt >= 0)
4653 c = *fmt++;
4654 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004655 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004656 prec = c - '0';
4657 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004658 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004659 if (!isdigit(c))
4660 break;
4661 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004662 PyErr_SetString(
4663 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004664 "prec too big");
4665 goto error;
4666 }
4667 prec = prec*10 + (c - '0');
4668 }
4669 }
4670 } /* prec */
4671 if (fmtcnt >= 0) {
4672 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004673 if (--fmtcnt >= 0)
4674 c = *fmt++;
4675 }
4676 }
4677 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004678 PyErr_SetString(PyExc_ValueError,
4679 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004680 goto error;
4681 }
4682 if (c != '%') {
4683 v = getnextarg(args, arglen, &argidx);
4684 if (v == NULL)
4685 goto error;
4686 }
4687 sign = 0;
4688 fill = ' ';
4689 switch (c) {
4690 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004691 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004692 len = 1;
4693 break;
4694 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004695#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004696 if (PyUnicode_Check(v)) {
4697 fmt = fmt_start;
4698 argidx = argidx_start;
4699 goto unicode;
4700 }
Georg Brandld45014b2005-10-01 17:06:00 +00004701#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004702 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004703#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004704 if (temp != NULL && PyUnicode_Check(temp)) {
4705 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004706 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004707 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004708 goto unicode;
4709 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004710#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004711 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004712 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004713 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004714 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004715 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004716 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004717 if (!PyString_Check(temp)) {
4718 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004719 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004720 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004721 goto error;
4722 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004723 pbuf = PyString_AS_STRING(temp);
4724 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004725 if (prec >= 0 && len > prec)
4726 len = prec;
4727 break;
4728 case 'i':
4729 case 'd':
4730 case 'u':
4731 case 'o':
4732 case 'x':
4733 case 'X':
4734 if (c == 'i')
4735 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004736 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004737 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004738 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004739 prec, c, &pbuf, &ilen);
4740 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004741 if (!temp)
4742 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004743 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004744 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004745 else {
4746 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004747 len = formatint(pbuf,
4748 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004749 flags, prec, c, v);
4750 if (len < 0)
4751 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004752 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004753 }
4754 if (flags & F_ZERO)
4755 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004756 break;
4757 case 'e':
4758 case 'E':
4759 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004760 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004761 case 'g':
4762 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004763 if (c == 'F')
4764 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004765 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004766 len = formatfloat(pbuf, sizeof(formatbuf),
4767 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004768 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004769 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004770 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004771 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004772 fill = '0';
4773 break;
4774 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004775#ifdef Py_USING_UNICODE
4776 if (PyUnicode_Check(v)) {
4777 fmt = fmt_start;
4778 argidx = argidx_start;
4779 goto unicode;
4780 }
4781#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004782 pbuf = formatbuf;
4783 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004784 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004785 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004786 break;
4787 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004788 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004789 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004790 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004791 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004792 (Py_ssize_t)(fmt - 1 -
4793 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004794 goto error;
4795 }
4796 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004797 if (*pbuf == '-' || *pbuf == '+') {
4798 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004799 len--;
4800 }
4801 else if (flags & F_SIGN)
4802 sign = '+';
4803 else if (flags & F_BLANK)
4804 sign = ' ';
4805 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004806 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004807 }
4808 if (width < len)
4809 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004810 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004811 reslen -= rescnt;
4812 rescnt = width + fmtcnt + 100;
4813 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004814 if (reslen < 0) {
4815 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004816 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004817 return PyErr_NoMemory();
4818 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004819 if (_PyString_Resize(&result, reslen) < 0) {
4820 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004821 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004822 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004823 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004824 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004825 }
4826 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004827 if (fill != ' ')
4828 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004829 rescnt--;
4830 if (width > len)
4831 width--;
4832 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004833 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4834 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004835 assert(pbuf[1] == c);
4836 if (fill != ' ') {
4837 *res++ = *pbuf++;
4838 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004839 }
Tim Petersfff53252001-04-12 18:38:48 +00004840 rescnt -= 2;
4841 width -= 2;
4842 if (width < 0)
4843 width = 0;
4844 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004845 }
4846 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004847 do {
4848 --rescnt;
4849 *res++ = fill;
4850 } while (--width > len);
4851 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004852 if (fill == ' ') {
4853 if (sign)
4854 *res++ = sign;
4855 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004856 (c == 'x' || c == 'X')) {
4857 assert(pbuf[0] == '0');
4858 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004859 *res++ = *pbuf++;
4860 *res++ = *pbuf++;
4861 }
4862 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004863 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004864 res += len;
4865 rescnt -= len;
4866 while (--width >= len) {
4867 --rescnt;
4868 *res++ = ' ';
4869 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004870 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004871 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004872 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004873 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004874 goto error;
4875 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004876 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004877 } /* '%' */
4878 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004879 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004880 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004881 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004882 goto error;
4883 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004884 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004885 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004886 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004887 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004888 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004889
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004890#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004891 unicode:
4892 if (args_owned) {
4893 Py_DECREF(args);
4894 args_owned = 0;
4895 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004896 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004897 if (PyTuple_Check(orig_args) && argidx > 0) {
4898 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004899 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004900 v = PyTuple_New(n);
4901 if (v == NULL)
4902 goto error;
4903 while (--n >= 0) {
4904 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4905 Py_INCREF(w);
4906 PyTuple_SET_ITEM(v, n, w);
4907 }
4908 args = v;
4909 } else {
4910 Py_INCREF(orig_args);
4911 args = orig_args;
4912 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004913 args_owned = 1;
4914 /* Take what we have of the result and let the Unicode formatting
4915 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004916 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004917 if (_PyString_Resize(&result, rescnt))
4918 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004919 fmtcnt = PyString_GET_SIZE(format) - \
4920 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004921 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4922 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004923 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004924 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004925 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004926 if (v == NULL)
4927 goto error;
4928 /* Paste what we have (result) to what the Unicode formatting
4929 function returned (v) and return the result (or error) */
4930 w = PyUnicode_Concat(result, v);
4931 Py_DECREF(result);
4932 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004933 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004934 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004935#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004936
Guido van Rossume5372401993-03-16 12:15:04 +00004937 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004938 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004939 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004940 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004941 }
Guido van Rossume5372401993-03-16 12:15:04 +00004942 return NULL;
4943}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004944
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945void
Fred Drakeba096332000-07-09 07:04:36 +00004946PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947{
4948 register PyStringObject *s = (PyStringObject *)(*p);
4949 PyObject *t;
4950 if (s == NULL || !PyString_Check(s))
4951 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004952 /* If it's a string subclass, we don't really know what putting
4953 it in the interned dict might do. */
4954 if (!PyString_CheckExact(s))
4955 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004956 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004957 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004958 if (interned == NULL) {
4959 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004960 if (interned == NULL) {
4961 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004962 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004963 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004964 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004965 t = PyDict_GetItem(interned, (PyObject *)s);
4966 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004967 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004968 Py_DECREF(*p);
4969 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004970 return;
4971 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972
Armin Rigo79f7ad22004-08-07 19:27:39 +00004973 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004974 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004975 return;
4976 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004977 /* The two references in interned are not counted by refcnt.
4978 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004979 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004980 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004981}
4982
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004983void
4984PyString_InternImmortal(PyObject **p)
4985{
4986 PyString_InternInPlace(p);
4987 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4988 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4989 Py_INCREF(*p);
4990 }
4991}
4992
Guido van Rossum2a61e741997-01-18 07:55:05 +00004993
4994PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004995PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004996{
4997 PyObject *s = PyString_FromString(cp);
4998 if (s == NULL)
4999 return NULL;
5000 PyString_InternInPlace(&s);
5001 return s;
5002}
5003
Guido van Rossum8cf04761997-08-02 02:57:45 +00005004void
Fred Drakeba096332000-07-09 07:04:36 +00005005PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005006{
5007 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005008 for (i = 0; i < UCHAR_MAX + 1; i++) {
5009 Py_XDECREF(characters[i]);
5010 characters[i] = NULL;
5011 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005012 Py_XDECREF(nullstring);
5013 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005014}
Barry Warsawa903ad982001-02-23 16:40:48 +00005015
Barry Warsawa903ad982001-02-23 16:40:48 +00005016void _Py_ReleaseInternedStrings(void)
5017{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005018 PyObject *keys;
5019 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005020 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005021 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005022
5023 if (interned == NULL || !PyDict_Check(interned))
5024 return;
5025 keys = PyDict_Keys(interned);
5026 if (keys == NULL || !PyList_Check(keys)) {
5027 PyErr_Clear();
5028 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005029 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005030
5031 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5032 detector, interned strings are not forcibly deallocated; rather, we
5033 give them their stolen references back, and then clear and DECREF
5034 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005035
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005036 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005037 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5038 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005039 for (i = 0; i < n; i++) {
5040 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5041 switch (s->ob_sstate) {
5042 case SSTATE_NOT_INTERNED:
5043 /* XXX Shouldn't happen */
5044 break;
5045 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005046 Py_Refcnt(s) += 1;
5047 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005048 break;
5049 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005050 Py_Refcnt(s) += 2;
5051 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005052 break;
5053 default:
5054 Py_FatalError("Inconsistent interned string state.");
5055 }
5056 s->ob_sstate = SSTATE_NOT_INTERNED;
5057 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005058 fprintf(stderr, "total size of all interned strings: "
5059 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5060 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005061 Py_DECREF(keys);
5062 PyDict_Clear(interned);
5063 Py_DECREF(interned);
5064 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005065}