blob: a0e2837a9478fb2224a94105f053896d39d7d4ff [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000424 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +0000504 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Martin v. Löwis68192102007-07-21 06:55:02 +0000524 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000536 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Martin v. Löwis68192102007-07-21 06:55:02 +0000722 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
Martin v. Löwis68192102007-07-21 06:55:02 +0000755 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Brett Cannon01531592007-09-17 03:28:34 +0000793 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 char *data = op->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +0000810 Py_ssize_t size = Py_Size(op);
Brett Cannon01531592007-09-17 03:28:34 +0000811 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000812 while (size > INT_MAX) {
813 /* Very long strings cannot be written atomically.
814 * But don't write exactly INT_MAX bytes at a time
815 * to avoid memory aligment issues.
816 */
817 const int chunk_size = INT_MAX & ~0x3FFF;
818 fwrite(data, 1, chunk_size, fp);
819 data += chunk_size;
820 size -= chunk_size;
821 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000825 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#endif
Brett Cannon01531592007-09-17 03:28:34 +0000827 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000828 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Martin v. Löwis68192102007-07-21 06:55:02 +0000833 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
834 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '"';
836
Brett Cannon01531592007-09-17 03:28:34 +0000837 str_len = Py_Size(op);
838 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000840 for (i = 0; i < str_len; i++) {
841 /* Since strings are immutable and the caller should have a
842 reference, accessing the interal buffer should not be an issue
843 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\r");
853 else if (c < ' ' || c >= 0x7f)
854 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000855 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000856 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000859 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000860 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863PyObject *
864PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000866 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis68192102007-07-21 06:55:02 +0000867 size_t newsize = 2 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +0000869 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 PyErr_SetString(PyExc_OverflowError,
871 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000872 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000873 }
874 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000876 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 }
878 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000879 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 register char c;
881 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000882 int quote;
883
Thomas Wouters7e474022000-07-16 12:04:32 +0000884 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000885 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000886 if (smartquotes &&
Martin v. Löwis68192102007-07-21 06:55:02 +0000887 memchr(op->ob_sval, '\'', Py_Size(op)) &&
888 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 quote = '"';
890
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 *p++ = quote;
Martin v. Löwis68192102007-07-21 06:55:02 +0000893 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000894 /* There's at least enough room for a hex escape
895 and a closing quote. */
896 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000898 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000900 else if (c == '\t')
901 *p++ = '\\', *p++ = 't';
902 else if (c == '\n')
903 *p++ = '\\', *p++ = 'n';
904 else if (c == '\r')
905 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000906 else if (c < ' ' || c >= 0x7f) {
907 /* For performance, we don't want to call
908 PyOS_snprintf here (extra layers of
909 function call). */
910 sprintf(p, "\\x%02x", c & 0xff);
911 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000912 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000913 else
914 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000916 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000917 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000919 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000920 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923}
924
Guido van Rossum189f1df2001-05-01 16:51:53 +0000925static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000926string_repr(PyObject *op)
927{
928 return PyString_Repr(op, 1);
929}
930
931static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000932string_str(PyObject *s)
933{
Tim Petersc9933152001-10-16 20:18:24 +0000934 assert(PyString_Check(s));
935 if (PyString_CheckExact(s)) {
936 Py_INCREF(s);
937 return s;
938 }
939 else {
940 /* Subtype -- return genuine string with the same value. */
941 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis68192102007-07-21 06:55:02 +0000942 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000943 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000944}
945
Martin v. Löwis18e16552006-02-15 17:27:45 +0000946static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000947string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948{
Martin v. Löwis68192102007-07-21 06:55:02 +0000949 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950}
951
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000953string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954{
Andrew Dalke598710c2006-05-25 18:18:39 +0000955 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956 register PyStringObject *op;
957 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000958#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000959 if (PyUnicode_Check(bb))
960 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000961#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000963 "cannot concatenate 'str' and '%.200s' objects",
Martin v. Löwis68192102007-07-21 06:55:02 +0000964 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 return NULL;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 /* Optimize cases with empty left or right operand */
Martin v. Löwis68192102007-07-21 06:55:02 +0000969 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis68192102007-07-21 06:55:02 +0000971 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000972 Py_INCREF(bb);
973 return bb;
974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 Py_INCREF(a);
976 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977 }
Martin v. Löwis68192102007-07-21 06:55:02 +0000978 size = Py_Size(a) + Py_Size(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000979 if (size < 0) {
980 PyErr_SetString(PyExc_OverflowError,
981 "strings are too large to concat");
982 return NULL;
983 }
984
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000985 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000986 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000987 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000989 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000990 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000991 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis68192102007-07-21 06:55:02 +0000992 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
993 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000994 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996#undef b
997}
998
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 register Py_ssize_t i;
1003 register Py_ssize_t j;
1004 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001006 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007 if (n < 0)
1008 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001009 /* watch out for overflows: the size can overflow int,
1010 * and the # of bytes needed can overflow size_t
1011 */
Martin v. Löwis68192102007-07-21 06:55:02 +00001012 size = Py_Size(a) * n;
1013 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001014 PyErr_SetString(PyExc_OverflowError,
1015 "repeated string is too long");
1016 return NULL;
1017 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001018 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 Py_INCREF(a);
1020 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 }
Tim Peterse7c05322004-06-27 17:24:49 +00001022 nbytes = (size_t)size;
1023 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "repeated string is too long");
1026 return NULL;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001029 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001030 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001032 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001033 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001034 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001035 op->ob_sval[size] = '\0';
Martin v. Löwis68192102007-07-21 06:55:02 +00001036 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001037 memset(op->ob_sval, a->ob_sval[0] , n);
1038 return (PyObject *) op;
1039 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i = 0;
1041 if (i < size) {
Martin v. Löwis68192102007-07-21 06:55:02 +00001042 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1043 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 }
1045 while (i < size) {
1046 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001047 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 i += j;
1049 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001050 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
1053/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001056string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001058 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
1060 if (i < 0)
1061 i = 0;
1062 if (j < 0)
1063 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis68192102007-07-21 06:55:02 +00001064 if (j > Py_Size(a))
1065 j = Py_Size(a);
1066 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001067 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 Py_INCREF(a);
1069 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 }
1071 if (j < i)
1072 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074}
1075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001078{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001079 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (PyUnicode_Check(sub_obj))
1082 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001083#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001085 PyErr_Format(PyExc_TypeError,
1086 "'in <string>' requires string as left operand, "
Martin v. Löwis68192102007-07-21 06:55:02 +00001087 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001088 return -1;
1089 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001090 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001091
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001093}
1094
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001096string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 PyObject *v;
Martin v. Löwis68192102007-07-21 06:55:02 +00001100 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001101 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001102 return NULL;
1103 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 pchar = a->ob_sval[i];
1105 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001106 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001107 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 else {
1109#ifdef COUNT_ALLOCS
1110 one_strings++;
1111#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001112 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001113 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001114 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115}
1116
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117static PyObject*
1118string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001121 Py_ssize_t len_a, len_b;
1122 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 PyObject *result;
1124
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001125 /* Make sure both arguments are strings. */
1126 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 result = Py_NotImplemented;
1128 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 if (a == b) {
1131 switch (op) {
1132 case Py_EQ:case Py_LE:case Py_GE:
1133 result = Py_True;
1134 goto out;
1135 case Py_NE:case Py_LT:case Py_GT:
1136 result = Py_False;
1137 goto out;
1138 }
1139 }
1140 if (op == Py_EQ) {
1141 /* Supporting Py_NE here as well does not save
1142 much time, since Py_NE is rarely used. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001143 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001144 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis68192102007-07-21 06:55:02 +00001145 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 result = Py_True;
1147 } else {
1148 result = Py_False;
1149 }
1150 goto out;
1151 }
Martin v. Löwis68192102007-07-21 06:55:02 +00001152 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 min_len = (len_a < len_b) ? len_a : len_b;
1154 if (min_len > 0) {
1155 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1156 if (c==0)
1157 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001158 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 c = 0;
1160 if (c == 0)
1161 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1162 switch (op) {
1163 case Py_LT: c = c < 0; break;
1164 case Py_LE: c = c <= 0; break;
1165 case Py_EQ: assert(0); break; /* unreachable */
1166 case Py_NE: c = c != 0; break;
1167 case Py_GT: c = c > 0; break;
1168 case Py_GE: c = c >= 0; break;
1169 default:
1170 result = Py_NotImplemented;
1171 goto out;
1172 }
1173 result = c ? Py_True : Py_False;
1174 out:
1175 Py_INCREF(result);
1176 return result;
1177}
1178
1179int
1180_PyString_Eq(PyObject *o1, PyObject *o2)
1181{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001182 PyStringObject *a = (PyStringObject*) o1;
1183 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis68192102007-07-21 06:55:02 +00001184 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001185 && *a->ob_sval == *b->ob_sval
Martin v. Löwis68192102007-07-21 06:55:02 +00001186 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001187}
1188
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189static long
Fred Drakeba096332000-07-09 07:04:36 +00001190string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 register unsigned char *p;
1194 register long x;
1195
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 if (a->ob_shash != -1)
1197 return a->ob_shash;
Martin v. Löwis68192102007-07-21 06:55:02 +00001198 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001199 p = (unsigned char *) a->ob_sval;
1200 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001202 x = (1000003*x) ^ *p++;
Martin v. Löwis68192102007-07-21 06:55:02 +00001203 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 if (x == -1)
1205 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001206 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207 return x;
1208}
1209
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210static PyObject*
1211string_subscript(PyStringObject* self, PyObject* item)
1212{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001213 if (PyIndex_Check(item)) {
1214 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 if (i == -1 && PyErr_Occurred())
1216 return NULL;
1217 if (i < 0)
1218 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001219 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 }
1221 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001222 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 char* source_buf;
1224 char* result_buf;
1225 PyObject* result;
1226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 PyString_GET_SIZE(self),
1229 &start, &stop, &step, &slicelength) < 0) {
1230 return NULL;
1231 }
1232
1233 if (slicelength <= 0) {
1234 return PyString_FromStringAndSize("", 0);
1235 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001236 else if (start == 0 && step == 1 &&
1237 slicelength == PyString_GET_SIZE(self) &&
1238 PyString_CheckExact(self)) {
1239 Py_INCREF(self);
1240 return (PyObject *)self;
1241 }
1242 else if (step == 1) {
1243 return PyString_FromStringAndSize(
1244 PyString_AS_STRING(self) + start,
1245 slicelength);
1246 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001265 PyErr_Format(PyExc_TypeError,
1266 "string indices must be integers, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00001267 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 return NULL;
1269 }
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001277 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279 }
1280 *ptr = (void *)self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001281 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286{
Guido van Rossum045e6881997-09-08 18:30:11 +00001287 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001288 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001289 return -1;
1290}
1291
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292static Py_ssize_t
1293string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294{
1295 if ( lenp )
Martin v. Löwis68192102007-07-21 06:55:02 +00001296 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297 return 1;
1298}
1299
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300static Py_ssize_t
1301string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001302{
1303 if ( index != 0 ) {
1304 PyErr_SetString(PyExc_SystemError,
1305 "accessing non-existent string segment");
1306 return -1;
1307 }
1308 *ptr = self->ob_sval;
Martin v. Löwis68192102007-07-21 06:55:02 +00001309 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001310}
1311
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001312static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001313 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001314 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001315 (ssizeargfunc)string_repeat, /*sq_repeat*/
1316 (ssizeargfunc)string_item, /*sq_item*/
1317 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001318 0, /*sq_ass_item*/
1319 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001320 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001321};
1322
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001323static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001325 (binaryfunc)string_subscript,
1326 0,
1327};
1328
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001329static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001330 (readbufferproc)string_buffer_getreadbuf,
1331 (writebufferproc)string_buffer_getwritebuf,
1332 (segcountproc)string_buffer_getsegcount,
1333 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001334};
1335
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336
1337
1338#define LEFTSTRIP 0
1339#define RIGHTSTRIP 1
1340#define BOTHSTRIP 2
1341
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001342/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001343static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1344
1345#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001346
Andrew Dalke525eab32006-05-26 14:00:45 +00001347
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001348/* Don't call if length < 2 */
1349#define Py_STRING_MATCH(target, offset, pattern, length) \
1350 (target[offset] == pattern[0] && \
1351 target[offset+length-1] == pattern[length-1] && \
1352 !memcmp(target+offset+1, pattern+1, length-2) )
1353
1354
Andrew Dalke525eab32006-05-26 14:00:45 +00001355/* Overallocate the initial list to reduce the number of reallocs for small
1356 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1357 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1358 text (roughly 11 words per line) and field delimited data (usually 1-10
1359 fields). For large strings the split algorithms are bandwidth limited
1360 so increasing the preallocation likely will not improve things.*/
1361
1362#define MAX_PREALLOC 12
1363
1364/* 5 splits gives 6 elements */
1365#define PREALLOC_SIZE(maxsplit) \
1366 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1367
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368#define SPLIT_APPEND(data, left, right) \
1369 str = PyString_FromStringAndSize((data) + (left), \
1370 (right) - (left)); \
1371 if (str == NULL) \
1372 goto onError; \
1373 if (PyList_Append(list, str)) { \
1374 Py_DECREF(str); \
1375 goto onError; \
1376 } \
1377 else \
1378 Py_DECREF(str);
1379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001381 str = PyString_FromStringAndSize((data) + (left), \
1382 (right) - (left)); \
1383 if (str == NULL) \
1384 goto onError; \
1385 if (count < MAX_PREALLOC) { \
1386 PyList_SET_ITEM(list, count, str); \
1387 } else { \
1388 if (PyList_Append(list, str)) { \
1389 Py_DECREF(str); \
1390 goto onError; \
1391 } \
1392 else \
1393 Py_DECREF(str); \
1394 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001395 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001396
1397/* Always force the list to the expected size. */
Martin v. Löwis68192102007-07-21 06:55:02 +00001398#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001399
Andrew Dalke02758d62006-05-26 15:21:01 +00001400#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1401#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1402#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1403#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1404
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001405Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001406split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Andrew Dalke525eab32006-05-26 14:00:45 +00001408 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411
1412 if (list == NULL)
1413 return NULL;
1414
Andrew Dalke02758d62006-05-26 15:21:01 +00001415 i = j = 0;
1416
1417 while (maxsplit-- > 0) {
1418 SKIP_SPACE(s, i, len);
1419 if (i==len) break;
1420 j = i; i++;
1421 SKIP_NONSPACE(s, i, len);
1422 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001424
1425 if (i < len) {
1426 /* Only occurs when maxsplit was reached */
1427 /* Skip any remaining whitespace and copy to end of string */
1428 SKIP_SPACE(s, i, len);
1429 if (i != len)
1430 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001431 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001432 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 Py_DECREF(list);
1436 return NULL;
1437}
1438
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001439Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001440split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441{
Andrew Dalke525eab32006-05-26 14:00:45 +00001442 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001444 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445
1446 if (list == NULL)
1447 return NULL;
1448
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001449 i = j = 0;
1450 while ((j < len) && (maxcount-- > 0)) {
1451 for(; j<len; j++) {
1452 /* I found that using memchr makes no difference */
1453 if (s[j] == ch) {
1454 SPLIT_ADD(s, i, j);
1455 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001456 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001457 }
1458 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001459 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001460 if (i <= len) {
1461 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001464 return list;
1465
1466 onError:
1467 Py_DECREF(list);
1468 return NULL;
1469}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001471PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472"S.split([sep [,maxsplit]]) -> list of strings\n\
1473\n\
1474Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001476splits are done. If sep is not specified or is None, any\n\
1477whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478
1479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001480string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001482 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001485 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001486#ifdef USE_FAST
1487 Py_ssize_t pos;
1488#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489
Martin v. Löwis9c830762006-04-13 08:37:17 +00001490 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001493 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 if (PyString_Check(subobj)) {
1497 sub = PyString_AS_STRING(subobj);
1498 n = PyString_GET_SIZE(subobj);
1499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001500#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 else if (PyUnicode_Check(subobj))
1502 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001503#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001504 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1505 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001506
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 if (n == 0) {
1508 PyErr_SetString(PyExc_ValueError, "empty separator");
1509 return NULL;
1510 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511 else if (n == 1)
1512 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513
Andrew Dalke525eab32006-05-26 14:00:45 +00001514 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 if (list == NULL)
1516 return NULL;
1517
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001518#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001520 while (maxsplit-- > 0) {
1521 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1522 if (pos < 0)
1523 break;
1524 j = i+pos;
1525 SPLIT_ADD(s, i, j);
1526 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001528#else
1529 i = j = 0;
1530 while ((j+n <= len) && (maxsplit-- > 0)) {
1531 for (; j+n <= len; j++) {
1532 if (Py_STRING_MATCH(s, j, sub, n)) {
1533 SPLIT_ADD(s, i, j);
1534 i = j = j + n;
1535 break;
1536 }
1537 }
1538 }
1539#endif
1540 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001541 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 return list;
1543
Andrew Dalke525eab32006-05-26 14:00:45 +00001544 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 Py_DECREF(list);
1546 return NULL;
1547}
1548
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549PyDoc_STRVAR(partition__doc__,
1550"S.partition(sep) -> (head, sep, tail)\n\
1551\n\
1552Searches for the separator sep in S, and returns the part before it,\n\
1553the separator itself, and the part after it. If the separator is not\n\
1554found, returns S and two empty strings.");
1555
1556static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001557string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001559 const char *sep;
1560 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001561
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001562 if (PyString_Check(sep_obj)) {
1563 sep = PyString_AS_STRING(sep_obj);
1564 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001565 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001566#ifdef Py_USING_UNICODE
1567 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001568 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001569#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001570 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001571 return NULL;
1572
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001573 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001574 (PyObject*) self,
1575 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1576 sep_obj, sep, sep_len
1577 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578}
1579
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001580PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001581"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001582\n\
1583Searches for the separator sep in S, starting at the end of S, and returns\n\
1584the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001585separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001586
1587static PyObject *
1588string_rpartition(PyStringObject *self, PyObject *sep_obj)
1589{
1590 const char *sep;
1591 Py_ssize_t sep_len;
1592
1593 if (PyString_Check(sep_obj)) {
1594 sep = PyString_AS_STRING(sep_obj);
1595 sep_len = PyString_GET_SIZE(sep_obj);
1596 }
1597#ifdef Py_USING_UNICODE
1598 else if (PyUnicode_Check(sep_obj))
1599 return PyUnicode_Partition((PyObject *) self, sep_obj);
1600#endif
1601 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1602 return NULL;
1603
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001604 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001605 (PyObject*) self,
1606 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1607 sep_obj, sep, sep_len
1608 );
1609}
1610
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001611Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001612rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613{
Andrew Dalke525eab32006-05-26 14:00:45 +00001614 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001615 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617
1618 if (list == NULL)
1619 return NULL;
1620
Andrew Dalke02758d62006-05-26 15:21:01 +00001621 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001622
Andrew Dalke02758d62006-05-26 15:21:01 +00001623 while (maxsplit-- > 0) {
1624 RSKIP_SPACE(s, i);
1625 if (i<0) break;
1626 j = i; i--;
1627 RSKIP_NONSPACE(s, i);
1628 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001629 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001630 if (i >= 0) {
1631 /* Only occurs when maxsplit was reached */
1632 /* Skip any remaining whitespace and copy to beginning of string */
1633 RSKIP_SPACE(s, i);
1634 if (i >= 0)
1635 SPLIT_ADD(s, 0, i + 1);
1636
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001637 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001638 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001639 if (PyList_Reverse(list) < 0)
1640 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001641 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001643 Py_DECREF(list);
1644 return NULL;
1645}
1646
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001647Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001648rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649{
Andrew Dalke525eab32006-05-26 14:00:45 +00001650 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001652 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653
1654 if (list == NULL)
1655 return NULL;
1656
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001657 i = j = len - 1;
1658 while ((i >= 0) && (maxcount-- > 0)) {
1659 for (; i >= 0; i--) {
1660 if (s[i] == ch) {
1661 SPLIT_ADD(s, i + 1, j + 1);
1662 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001663 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001664 }
1665 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001666 }
1667 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001668 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001669 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001670 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001671 if (PyList_Reverse(list) < 0)
1672 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001673 return list;
1674
1675 onError:
1676 Py_DECREF(list);
1677 return NULL;
1678}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679
1680PyDoc_STRVAR(rsplit__doc__,
1681"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1682\n\
1683Return a list of the words in the string S, using sep as the\n\
1684delimiter string, starting at the end of the string and working\n\
1685to the front. If maxsplit is given, at most maxsplit splits are\n\
1686done. If sep is not specified or is None, any whitespace string\n\
1687is a separator.");
1688
1689static PyObject *
1690string_rsplit(PyStringObject *self, PyObject *args)
1691{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001692 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001693 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001695 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001696
Martin v. Löwis9c830762006-04-13 08:37:17 +00001697 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 return NULL;
1699 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001700 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 if (subobj == Py_None)
1702 return rsplit_whitespace(s, len, maxsplit);
1703 if (PyString_Check(subobj)) {
1704 sub = PyString_AS_STRING(subobj);
1705 n = PyString_GET_SIZE(subobj);
1706 }
1707#ifdef Py_USING_UNICODE
1708 else if (PyUnicode_Check(subobj))
1709 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1710#endif
1711 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1712 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 if (n == 0) {
1715 PyErr_SetString(PyExc_ValueError, "empty separator");
1716 return NULL;
1717 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001718 else if (n == 1)
1719 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720
Andrew Dalke525eab32006-05-26 14:00:45 +00001721 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 if (list == NULL)
1723 return NULL;
1724
1725 j = len;
1726 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001727
1728 while ( (i >= 0) && (maxsplit-- > 0) ) {
1729 for (; i>=0; i--) {
1730 if (Py_STRING_MATCH(s, i, sub, n)) {
1731 SPLIT_ADD(s, i + n, j);
1732 j = i;
1733 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001734 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001735 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001737 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001738 SPLIT_ADD(s, 0, j);
1739 FIX_PREALLOC_SIZE(list);
1740 if (PyList_Reverse(list) < 0)
1741 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 return list;
1743
Andrew Dalke525eab32006-05-26 14:00:45 +00001744onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001745 Py_DECREF(list);
1746 return NULL;
1747}
1748
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001750PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751"S.join(sequence) -> string\n\
1752\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001754sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755
1756static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001757string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758{
1759 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001760 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001763 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001765 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001766 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767
Tim Peters19fe14e2001-01-19 03:03:47 +00001768 seq = PySequence_Fast(orig, "");
1769 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001770 return NULL;
1771 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001772
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001773 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 if (seqlen == 0) {
1775 Py_DECREF(seq);
1776 return PyString_FromString("");
1777 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001779 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001780 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1781 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001783 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786
Raymond Hettinger674f2412004-08-23 23:23:54 +00001787 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001788 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001789 * Do a pre-pass to figure out the total amount of space we'll
1790 * need (sz), see whether any argument is absurd, and defer to
1791 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001793 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 item = PySequence_Fast_GET_ITEM(seq, i);
1796 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001797#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001798 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001799 /* Defer to Unicode join.
1800 * CAUTION: There's no gurantee that the
1801 * original sequence can be iterated over
1802 * again, so we must pass seq here.
1803 */
1804 PyObject *result;
1805 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001806 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001807 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001808 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001809#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001810 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001811 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001812 " %.80s found",
Martin v. Löwis68192102007-07-21 06:55:02 +00001813 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 Py_DECREF(seq);
1815 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001816 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001817 sz += PyString_GET_SIZE(item);
1818 if (i != 0)
1819 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001820 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001821 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001822 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 Py_DECREF(seq);
1824 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001826 }
1827
1828 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001829 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001830 if (res == NULL) {
1831 Py_DECREF(seq);
1832 return NULL;
1833 }
1834
1835 /* Catenate everything. */
1836 p = PyString_AS_STRING(res);
1837 for (i = 0; i < seqlen; ++i) {
1838 size_t n;
1839 item = PySequence_Fast_GET_ITEM(seq, i);
1840 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001841 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001842 p += n;
1843 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001844 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001845 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001846 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001848
Jeremy Hylton49048292000-07-11 03:28:17 +00001849 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851}
1852
Tim Peters52e155e2001-06-16 05:42:57 +00001853PyObject *
1854_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001855{
Tim Petersa7259592001-06-16 05:11:17 +00001856 assert(sep != NULL && PyString_Check(sep));
1857 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001858 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001859}
1860
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001861Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001862string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001863{
1864 if (*end > len)
1865 *end = len;
1866 else if (*end < 0)
1867 *end += len;
1868 if (*end < 0)
1869 *end = 0;
1870 if (*start < 0)
1871 *start += len;
1872 if (*start < 0)
1873 *start = 0;
1874}
1875
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001876Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001877string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001880 const char *sub;
1881 Py_ssize_t sub_len;
1882 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001883 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884
Facundo Batista57d56692007-11-16 18:04:14 +00001885 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1886 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001887 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001888 /* To support None in "start" and "end" arguments, meaning
1889 the same as if they were not passed.
1890 */
1891 if (obj_start != Py_None)
1892 if (!_PyEval_SliceIndex(obj_start, &start))
1893 return -2;
1894 if (obj_end != Py_None)
1895 if (!_PyEval_SliceIndex(obj_end, &end))
1896 return -2;
1897
Guido van Rossum4c08d552000-03-10 22:55:18 +00001898 if (PyString_Check(subobj)) {
1899 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001900 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001901 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001902#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001903 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001904 return PyUnicode_Find(
1905 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001906#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001907 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001908 /* XXX - the "expected a character buffer object" is pretty
1909 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 return -2;
1911
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001912 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001913 return stringlib_find_slice(
1914 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1915 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001916 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001917 return stringlib_rfind_slice(
1918 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1919 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920}
1921
1922
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001923PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924"S.find(sub [,start [,end]]) -> int\n\
1925\n\
1926Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001927such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928arguments start and end are interpreted as in slice notation.\n\
1929\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001930Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931
1932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001933string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001935 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936 if (result == -2)
1937 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939}
1940
1941
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001942PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943"S.index(sub [,start [,end]]) -> int\n\
1944\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001945Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946
1947static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001948string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001950 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951 if (result == -2)
1952 return NULL;
1953 if (result == -1) {
1954 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001955 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956 return NULL;
1957 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001958 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959}
1960
1961
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001962PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963"S.rfind(sub [,start [,end]]) -> int\n\
1964\n\
1965Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001966such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967arguments start and end are interpreted as in slice notation.\n\
1968\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001969Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970
1971static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001972string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 if (result == -2)
1976 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978}
1979
1980
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001981PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982"S.rindex(sub [,start [,end]]) -> int\n\
1983\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001984Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985
1986static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001987string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990 if (result == -2)
1991 return NULL;
1992 if (result == -1) {
1993 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001994 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995 return NULL;
1996 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001997 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998}
1999
2000
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002001Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002002do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2003{
2004 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002006 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2008 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002009
2010 i = 0;
2011 if (striptype != RIGHTSTRIP) {
2012 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2013 i++;
2014 }
2015 }
2016
2017 j = len;
2018 if (striptype != LEFTSTRIP) {
2019 do {
2020 j--;
2021 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2022 j++;
2023 }
2024
2025 if (i == 0 && j == len && PyString_CheckExact(self)) {
2026 Py_INCREF(self);
2027 return (PyObject*)self;
2028 }
2029 else
2030 return PyString_FromStringAndSize(s+i, j-i);
2031}
2032
2033
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002034Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002035do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036{
2037 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 i = 0;
2041 if (striptype != RIGHTSTRIP) {
2042 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2043 i++;
2044 }
2045 }
2046
2047 j = len;
2048 if (striptype != LEFTSTRIP) {
2049 do {
2050 j--;
2051 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2052 j++;
2053 }
2054
Tim Peters8fa5dd02001-09-12 02:18:30 +00002055 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056 Py_INCREF(self);
2057 return (PyObject*)self;
2058 }
2059 else
2060 return PyString_FromStringAndSize(s+i, j-i);
2061}
2062
2063
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002064Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002065do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2066{
2067 PyObject *sep = NULL;
2068
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002069 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002070 return NULL;
2071
2072 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002073 if (PyString_Check(sep))
2074 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002075#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002076 else if (PyUnicode_Check(sep)) {
2077 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2078 PyObject *res;
2079 if (uniself==NULL)
2080 return NULL;
2081 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2082 striptype, sep);
2083 Py_DECREF(uniself);
2084 return res;
2085 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002086#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002087 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002088#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002089 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002090#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002091 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002092#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002093 STRIPNAME(striptype));
2094 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002095 }
2096
2097 return do_strip(self, striptype);
2098}
2099
2100
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002101PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002102"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103\n\
2104Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002105whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106If chars is given and not None, remove characters in chars instead.\n\
2107If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
2109static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002112 if (PyTuple_GET_SIZE(args) == 0)
2113 return do_strip(self, BOTHSTRIP); /* Common case */
2114 else
2115 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116}
2117
2118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002119PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002120"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002122Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002123If chars is given and not None, remove characters in chars instead.\n\
2124If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125
2126static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002127string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002129 if (PyTuple_GET_SIZE(args) == 0)
2130 return do_strip(self, LEFTSTRIP); /* Common case */
2131 else
2132 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133}
2134
2135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002136PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002137"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002139Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002140If chars is given and not None, remove characters in chars instead.\n\
2141If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
2143static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002144string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002146 if (PyTuple_GET_SIZE(args) == 0)
2147 return do_strip(self, RIGHTSTRIP); /* Common case */
2148 else
2149 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150}
2151
2152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002153PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154"S.lower() -> string\n\
2155\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002158/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2159#ifndef _tolower
2160#define _tolower tolower
2161#endif
2162
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002166 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002168 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002170 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173
2174 s = PyString_AS_STRING(newobj);
2175
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002176 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180 if (isupper(c))
2181 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002183
Anthony Baxtera6286212006-04-11 07:42:36 +00002184 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185}
2186
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188"S.upper() -> string\n\
2189\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002190Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002192#ifndef _toupper
2193#define _toupper toupper
2194#endif
2195
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002197string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002199 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002200 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002203 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002204 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002206
2207 s = PyString_AS_STRING(newobj);
2208
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002209 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002210
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002212 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002213 if (islower(c))
2214 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002216
Anthony Baxtera6286212006-04-11 07:42:36 +00002217 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218}
2219
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002220PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221"S.title() -> string\n\
2222\n\
2223Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225
2226static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002227string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228{
2229 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002230 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002232 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233
Anthony Baxtera6286212006-04-11 07:42:36 +00002234 newobj = PyString_FromStringAndSize(NULL, n);
2235 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002238 for (i = 0; i < n; i++) {
2239 int c = Py_CHARMASK(*s++);
2240 if (islower(c)) {
2241 if (!previous_is_cased)
2242 c = toupper(c);
2243 previous_is_cased = 1;
2244 } else if (isupper(c)) {
2245 if (previous_is_cased)
2246 c = tolower(c);
2247 previous_is_cased = 1;
2248 } else
2249 previous_is_cased = 0;
2250 *s_new++ = c;
2251 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002252 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253}
2254
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002255PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256"S.capitalize() -> string\n\
2257\n\
2258Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260
2261static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002262string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263{
2264 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002265 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002266 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
Anthony Baxtera6286212006-04-11 07:42:36 +00002268 newobj = PyString_FromStringAndSize(NULL, n);
2269 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002271 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272 if (0 < n) {
2273 int c = Py_CHARMASK(*s++);
2274 if (islower(c))
2275 *s_new = toupper(c);
2276 else
2277 *s_new = c;
2278 s_new++;
2279 }
2280 for (i = 1; i < n; i++) {
2281 int c = Py_CHARMASK(*s++);
2282 if (isupper(c))
2283 *s_new = tolower(c);
2284 else
2285 *s_new = c;
2286 s_new++;
2287 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002288 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289}
2290
2291
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002292PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002293"S.count(sub[, start[, end]]) -> int\n\
2294\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002295Return the number of non-overlapping occurrences of substring sub in\n\
2296string S[start:end]. Optional arguments start and end are interpreted\n\
2297as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298
2299static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002300string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002302 PyObject *sub_obj;
2303 const char *str = PyString_AS_STRING(self), *sub;
2304 Py_ssize_t sub_len;
2305 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002307 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2308 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002310
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002311 if (PyString_Check(sub_obj)) {
2312 sub = PyString_AS_STRING(sub_obj);
2313 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002315#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002316 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002317 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002318 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002319 if (count == -1)
2320 return NULL;
2321 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002322 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002323 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002324#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002325 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 return NULL;
2327
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002328 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002329
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002330 return PyInt_FromSsize_t(
2331 stringlib_count(str + start, end - start, sub, sub_len)
2332 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333}
2334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002335PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336"S.swapcase() -> string\n\
2337\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002339converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340
2341static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002342string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343{
2344 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002345 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002346 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347
Anthony Baxtera6286212006-04-11 07:42:36 +00002348 newobj = PyString_FromStringAndSize(NULL, n);
2349 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002351 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 for (i = 0; i < n; i++) {
2353 int c = Py_CHARMASK(*s++);
2354 if (islower(c)) {
2355 *s_new = toupper(c);
2356 }
2357 else if (isupper(c)) {
2358 *s_new = tolower(c);
2359 }
2360 else
2361 *s_new = c;
2362 s_new++;
2363 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002364 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365}
2366
2367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002368PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369"S.translate(table [,deletechars]) -> string\n\
2370\n\
2371Return a copy of the string S, where all characters occurring\n\
2372in the optional argument deletechars are removed, and the\n\
2373remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002374translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375
2376static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002377string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002380 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002381 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002383 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002384 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385 PyObject *result;
2386 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002389 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392
2393 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002394 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002395 tablen = PyString_GET_SIZE(tableobj);
2396 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002397 else if (tableobj == Py_None) {
2398 table = NULL;
2399 tablen = 256;
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002403 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 parameter; instead a mapping to None will cause characters
2405 to be deleted. */
2406 if (delobj != NULL) {
2407 PyErr_SetString(PyExc_TypeError,
2408 "deletions are implemented differently for unicode");
2409 return NULL;
2410 }
2411 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2412 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002413#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002414 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002416
Martin v. Löwis00b61272002-12-12 20:03:19 +00002417 if (tablen != 256) {
2418 PyErr_SetString(PyExc_ValueError,
2419 "translation table must be 256 characters long");
2420 return NULL;
2421 }
2422
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 if (delobj != NULL) {
2424 if (PyString_Check(delobj)) {
2425 del_table = PyString_AS_STRING(delobj);
2426 dellen = PyString_GET_SIZE(delobj);
2427 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002428#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 else if (PyUnicode_Check(delobj)) {
2430 PyErr_SetString(PyExc_TypeError,
2431 "deletions are implemented differently for unicode");
2432 return NULL;
2433 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002434#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2436 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 }
2438 else {
2439 del_table = NULL;
2440 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441 }
2442
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002443 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 result = PyString_FromStringAndSize((char *)NULL, inlen);
2445 if (result == NULL)
2446 return NULL;
2447 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002448 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002450 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 /* If no deletions are required, use faster code */
2452 for (i = inlen; --i >= 0; ) {
2453 c = Py_CHARMASK(*input++);
2454 if (Py_CHARMASK((*output++ = table[c])) != c)
2455 changed = 1;
2456 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002457 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458 return result;
2459 Py_DECREF(result);
2460 Py_INCREF(input_obj);
2461 return input_obj;
2462 }
2463
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002464 if (table == NULL) {
2465 for (i = 0; i < 256; i++)
2466 trans_table[i] = Py_CHARMASK(i);
2467 } else {
2468 for (i = 0; i < 256; i++)
2469 trans_table[i] = Py_CHARMASK(table[i]);
2470 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471
2472 for (i = 0; i < dellen; i++)
2473 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2474
2475 for (i = inlen; --i >= 0; ) {
2476 c = Py_CHARMASK(*input++);
2477 if (trans_table[c] != -1)
2478 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2479 continue;
2480 changed = 1;
2481 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002482 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 Py_DECREF(result);
2484 Py_INCREF(input_obj);
2485 return input_obj;
2486 }
2487 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002488 if (inlen > 0)
2489 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002490 return result;
2491}
2492
2493
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002494#define FORWARD 1
2495#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002497/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002499#define findchar(target, target_len, c) \
2500 ((char *)memchr((const void *)(target), c, target_len))
2501
2502/* String ops must return a string. */
2503/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002504Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002505return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002507 if (PyString_CheckExact(self)) {
2508 Py_INCREF(self);
2509 return self;
2510 }
2511 return (PyStringObject *)PyString_FromStringAndSize(
2512 PyString_AS_STRING(self),
2513 PyString_GET_SIZE(self));
2514}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002516Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002517countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002518{
2519 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002520 const char *start=target;
2521 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002523 while ( (start=findchar(start, end-start, c)) != NULL ) {
2524 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002525 if (count >= maxcount)
2526 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002527 start += 1;
2528 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002529 return count;
2530}
2531
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002532Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002533findstring(const char *target, Py_ssize_t target_len,
2534 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002535 Py_ssize_t start,
2536 Py_ssize_t end,
2537 int direction)
2538{
2539 if (start < 0) {
2540 start += target_len;
2541 if (start < 0)
2542 start = 0;
2543 }
2544 if (end > target_len) {
2545 end = target_len;
2546 } else if (end < 0) {
2547 end += target_len;
2548 if (end < 0)
2549 end = 0;
2550 }
2551
2552 /* zero-length substrings always match at the first attempt */
2553 if (pattern_len == 0)
2554 return (direction > 0) ? start : end;
2555
2556 end -= pattern_len;
2557
2558 if (direction < 0) {
2559 for (; end >= start; end--)
2560 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2561 return end;
2562 } else {
2563 for (; start <= end; start++)
2564 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2565 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566 }
2567 return -1;
2568}
2569
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002570Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002571countstring(const char *target, Py_ssize_t target_len,
2572 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002573 Py_ssize_t start,
2574 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002575 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002577 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002578
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002579 if (start < 0) {
2580 start += target_len;
2581 if (start < 0)
2582 start = 0;
2583 }
2584 if (end > target_len) {
2585 end = target_len;
2586 } else if (end < 0) {
2587 end += target_len;
2588 if (end < 0)
2589 end = 0;
2590 }
2591
2592 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002593 if (pattern_len == 0 || maxcount == 0) {
2594 if (target_len+1 < maxcount)
2595 return target_len+1;
2596 return maxcount;
2597 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598
2599 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002600 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002601 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2603 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002604 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002605 end -= pattern_len-1;
2606 }
2607 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002608 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002609 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2610 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002611 if (--maxcount <= 0)
2612 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613 start += pattern_len-1;
2614 }
2615 }
2616 return count;
2617}
2618
2619
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002620/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002621
2622/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002623Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002624replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002625 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002626 Py_ssize_t maxcount)
2627{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002628 char *self_s, *result_s;
2629 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002630 Py_ssize_t count, i, product;
2631 PyStringObject *result;
2632
2633 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002634
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002635 /* 1 at the end plus 1 after every character */
2636 count = self_len+1;
2637 if (maxcount < count)
2638 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002639
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002640 /* Check for overflow */
2641 /* result_len = count * to_len + self_len; */
2642 product = count * to_len;
2643 if (product / to_len != count) {
2644 PyErr_SetString(PyExc_OverflowError,
2645 "replace string is too long");
2646 return NULL;
2647 }
2648 result_len = product + self_len;
2649 if (result_len < 0) {
2650 PyErr_SetString(PyExc_OverflowError,
2651 "replace string is too long");
2652 return NULL;
2653 }
2654
2655 if (! (result = (PyStringObject *)
2656 PyString_FromStringAndSize(NULL, result_len)) )
2657 return NULL;
2658
2659 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002660 result_s = PyString_AS_STRING(result);
2661
2662 /* TODO: special case single character, which doesn't need memcpy */
2663
2664 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002665 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666 result_s += to_len;
2667 count -= 1;
2668
2669 for (i=0; i<count; i++) {
2670 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002671 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002672 result_s += to_len;
2673 }
2674
2675 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002676 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677
2678 return result;
2679}
2680
2681/* Special case for deleting a single character */
2682/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002683Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002684replace_delete_single_character(PyStringObject *self,
2685 char from_c, Py_ssize_t maxcount)
2686{
2687 char *self_s, *result_s;
2688 char *start, *next, *end;
2689 Py_ssize_t self_len, result_len;
2690 Py_ssize_t count;
2691 PyStringObject *result;
2692
2693 self_len = PyString_GET_SIZE(self);
2694 self_s = PyString_AS_STRING(self);
2695
Andrew Dalke51324072006-05-26 20:25:22 +00002696 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697 if (count == 0) {
2698 return return_self(self);
2699 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002700
2701 result_len = self_len - count; /* from_len == 1 */
2702 assert(result_len>=0);
2703
2704 if ( (result = (PyStringObject *)
2705 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2706 return NULL;
2707 result_s = PyString_AS_STRING(result);
2708
2709 start = self_s;
2710 end = self_s + self_len;
2711 while (count-- > 0) {
2712 next = findchar(start, end-start, from_c);
2713 if (next == NULL)
2714 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002715 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716 result_s += (next-start);
2717 start = next+1;
2718 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002719 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002720
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002721 return result;
2722}
2723
2724/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2725
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002726Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002727replace_delete_substring(PyStringObject *self,
2728 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002729 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002730 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002732 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 Py_ssize_t count, offset;
2734 PyStringObject *result;
2735
2736 self_len = PyString_GET_SIZE(self);
2737 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738
2739 count = countstring(self_s, self_len,
2740 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002741 0, self_len, 1,
2742 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002743
2744 if (count == 0) {
2745 /* no matches */
2746 return return_self(self);
2747 }
2748
2749 result_len = self_len - (count * from_len);
2750 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 if ( (result = (PyStringObject *)
2753 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2754 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002755
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002757
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 start = self_s;
2759 end = self_s + self_len;
2760 while (count-- > 0) {
2761 offset = findstring(start, end-start,
2762 from_s, from_len,
2763 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002764 if (offset == -1)
2765 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002767
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002768 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002769
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002770 result_s += (next-start);
2771 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002772 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002773 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002774 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002775}
2776
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002777/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002778Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779replace_single_character_in_place(PyStringObject *self,
2780 char from_c, char to_c,
2781 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002782{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 char *self_s, *result_s, *start, *end, *next;
2784 Py_ssize_t self_len;
2785 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002786
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002787 /* The result string will be the same size */
2788 self_s = PyString_AS_STRING(self);
2789 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002790
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 if (next == NULL) {
2794 /* No matches; return the original string */
2795 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002796 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002797
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002798 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002799 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 if (result == NULL)
2801 return NULL;
2802 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002803 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002804
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 /* change everything in-place, starting with this one */
2806 start = result_s + (next-self_s);
2807 *start = to_c;
2808 start++;
2809 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002810
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002811 while (--maxcount > 0) {
2812 next = findchar(start, end-start, from_c);
2813 if (next == NULL)
2814 break;
2815 *next = to_c;
2816 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002817 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002818
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002820}
2821
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002823Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002825 const char *from_s, Py_ssize_t from_len,
2826 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 Py_ssize_t maxcount)
2828{
2829 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002830 char *self_s;
2831 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002833
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 self_s = PyString_AS_STRING(self);
2837 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002838
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 offset = findstring(self_s, self_len,
2840 from_s, from_len,
2841 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 if (offset == -1) {
2843 /* No matches; return the original string */
2844 return return_self(self);
2845 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002848 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002849 if (result == NULL)
2850 return NULL;
2851 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002852 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002853
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002854 /* change everything in-place, starting with this one */
2855 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002856 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002857 start += from_len;
2858 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002859
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002860 while ( --maxcount > 0) {
2861 offset = findstring(start, end-start,
2862 from_s, from_len,
2863 0, end-start, FORWARD);
2864 if (offset==-1)
2865 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002866 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 start += offset+from_len;
2868 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002869
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 return result;
2871}
2872
2873/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002874Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875replace_single_character(PyStringObject *self,
2876 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002877 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 Py_ssize_t maxcount)
2879{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002880 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002882 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 Py_ssize_t count, product;
2884 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002885
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002886 self_s = PyString_AS_STRING(self);
2887 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002888
Andrew Dalke51324072006-05-26 20:25:22 +00002889 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002890 if (count == 0) {
2891 /* no matches, return unchanged */
2892 return return_self(self);
2893 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002894
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 /* use the difference between current and new, hence the "-1" */
2896 /* result_len = self_len + count * (to_len-1) */
2897 product = count * (to_len-1);
2898 if (product / (to_len-1) != count) {
2899 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2900 return NULL;
2901 }
2902 result_len = self_len + product;
2903 if (result_len < 0) {
2904 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2905 return NULL;
2906 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002907
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 if ( (result = (PyStringObject *)
2909 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2910 return NULL;
2911 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002912
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002913 start = self_s;
2914 end = self_s + self_len;
2915 while (count-- > 0) {
2916 next = findchar(start, end-start, from_c);
2917 if (next == NULL)
2918 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002919
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920 if (next == start) {
2921 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002922 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 result_s += to_len;
2924 start += 1;
2925 } else {
2926 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002927 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002928 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002929 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930 result_s += to_len;
2931 start = next+1;
2932 }
2933 }
2934 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002935 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002936
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002937 return result;
2938}
2939
2940/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002941Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002943 const char *from_s, Py_ssize_t from_len,
2944 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002945 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002946 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002947 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002948 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002949 Py_ssize_t count, offset, product;
2950 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002951
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002952 self_s = PyString_AS_STRING(self);
2953 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002954
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002955 count = countstring(self_s, self_len,
2956 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002957 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958 if (count == 0) {
2959 /* no matches, return unchanged */
2960 return return_self(self);
2961 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002962
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002963 /* Check for overflow */
2964 /* result_len = self_len + count * (to_len-from_len) */
2965 product = count * (to_len-from_len);
2966 if (product / (to_len-from_len) != count) {
2967 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2968 return NULL;
2969 }
2970 result_len = self_len + product;
2971 if (result_len < 0) {
2972 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2973 return NULL;
2974 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002975
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 if ( (result = (PyStringObject *)
2977 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2978 return NULL;
2979 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002980
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981 start = self_s;
2982 end = self_s + self_len;
2983 while (count-- > 0) {
2984 offset = findstring(start, end-start,
2985 from_s, from_len,
2986 0, end-start, FORWARD);
2987 if (offset == -1)
2988 break;
2989 next = start+offset;
2990 if (next == start) {
2991 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002992 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 result_s += to_len;
2994 start += from_len;
2995 } else {
2996 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002997 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002999 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003000 result_s += to_len;
3001 start = next+from_len;
3002 }
3003 }
3004 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003005 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003006
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003007 return result;
3008}
3009
3010
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003011Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003013 const char *from_s, Py_ssize_t from_len,
3014 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003015 Py_ssize_t maxcount)
3016{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 if (maxcount < 0) {
3018 maxcount = PY_SSIZE_T_MAX;
3019 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3020 /* nothing to do; return the original string */
3021 return return_self(self);
3022 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003023
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 if (maxcount == 0 ||
3025 (from_len == 0 && to_len == 0)) {
3026 /* nothing to do; return the original string */
3027 return return_self(self);
3028 }
3029
3030 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003031
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003032 if (from_len == 0) {
3033 /* insert the 'to' string everywhere. */
3034 /* >>> "Python".replace("", ".") */
3035 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003036 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003037 }
3038
3039 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3040 /* point for an empty self string to generate a non-empty string */
3041 /* Special case so the remaining code always gets a non-empty string */
3042 if (PyString_GET_SIZE(self) == 0) {
3043 return return_self(self);
3044 }
3045
3046 if (to_len == 0) {
3047 /* delete all occurances of 'from' string */
3048 if (from_len == 1) {
3049 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003050 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003051 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003052 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 }
3054 }
3055
3056 /* Handle special case where both strings have the same length */
3057
3058 if (from_len == to_len) {
3059 if (from_len == 1) {
3060 return replace_single_character_in_place(
3061 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003062 from_s[0],
3063 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 maxcount);
3065 } else {
3066 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003067 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003068 }
3069 }
3070
3071 /* Otherwise use the more generic algorithms */
3072 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 return replace_single_character(self, from_s[0],
3074 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003075 } else {
3076 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003077 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 }
3079}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003080
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003081PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003082"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083\n\
3084Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003085old replaced by new. If the optional argument count is\n\
3086given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087
3088static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003089string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003091 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003093 const char *from_s, *to_s;
3094 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003095
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003096 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003097 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003098
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003099 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003100 from_s = PyString_AS_STRING(from);
3101 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003103#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003104 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003105 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003106 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003107#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003108 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109 return NULL;
3110
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003111 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003112 to_s = PyString_AS_STRING(to);
3113 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003115#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003116 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003117 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003118 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003119#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003120 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121 return NULL;
3122
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003123 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003124 from_s, from_len,
3125 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003126}
3127
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003128/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003129
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003130/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003131 * against substr, using the start and end arguments. Returns
3132 * -1 on error, 0 if not found and 1 if found.
3133 */
3134Py_LOCAL(int)
3135_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3136 Py_ssize_t end, int direction)
3137{
3138 Py_ssize_t len = PyString_GET_SIZE(self);
3139 Py_ssize_t slen;
3140 const char* sub;
3141 const char* str;
3142
3143 if (PyString_Check(substr)) {
3144 sub = PyString_AS_STRING(substr);
3145 slen = PyString_GET_SIZE(substr);
3146 }
3147#ifdef Py_USING_UNICODE
3148 else if (PyUnicode_Check(substr))
3149 return PyUnicode_Tailmatch((PyObject *)self,
3150 substr, start, end, direction);
3151#endif
3152 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3153 return -1;
3154 str = PyString_AS_STRING(self);
3155
3156 string_adjust_indices(&start, &end, len);
3157
3158 if (direction < 0) {
3159 /* startswith */
3160 if (start+slen > len)
3161 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003162 } else {
3163 /* endswith */
3164 if (end-start < slen || start > len)
3165 return 0;
3166
3167 if (end-slen > start)
3168 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003169 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003170 if (end-start >= slen)
3171 return ! memcmp(str+start, sub, slen);
3172 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003173}
3174
3175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003176PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003177"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003179Return True if S starts with the specified prefix, False otherwise.\n\
3180With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003181With optional end, stop comparing S at that position.\n\
3182prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183
3184static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003185string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003186{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003187 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003188 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003189 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003190 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191
Guido van Rossumc6821402000-05-08 14:08:05 +00003192 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3193 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003195 if (PyTuple_Check(subobj)) {
3196 Py_ssize_t i;
3197 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3198 result = _string_tailmatch(self,
3199 PyTuple_GET_ITEM(subobj, i),
3200 start, end, -1);
3201 if (result == -1)
3202 return NULL;
3203 else if (result) {
3204 Py_RETURN_TRUE;
3205 }
3206 }
3207 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 }
Georg Brandl24250812006-06-09 18:45:48 +00003209 result = _string_tailmatch(self, subobj, start, end, -1);
3210 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003211 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003212 else
Georg Brandl24250812006-06-09 18:45:48 +00003213 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003214}
3215
3216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003217PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003218"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003220Return True if S ends with the specified suffix, False otherwise.\n\
3221With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003222With optional end, stop comparing S at that position.\n\
3223suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003224
3225static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003226string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003228 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003229 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003231 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003232
Guido van Rossumc6821402000-05-08 14:08:05 +00003233 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3234 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003236 if (PyTuple_Check(subobj)) {
3237 Py_ssize_t i;
3238 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3239 result = _string_tailmatch(self,
3240 PyTuple_GET_ITEM(subobj, i),
3241 start, end, +1);
3242 if (result == -1)
3243 return NULL;
3244 else if (result) {
3245 Py_RETURN_TRUE;
3246 }
3247 }
3248 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 }
Georg Brandl24250812006-06-09 18:45:48 +00003250 result = _string_tailmatch(self, subobj, start, end, +1);
3251 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003252 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003253 else
Georg Brandl24250812006-06-09 18:45:48 +00003254 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003255}
3256
3257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003258PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003259"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003260\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003261Encodes S using the codec registered for encoding. encoding defaults\n\
3262to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003263handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003264a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3265'xmlcharrefreplace' as well as any other name registered with\n\
3266codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003267
3268static PyObject *
3269string_encode(PyStringObject *self, PyObject *args)
3270{
3271 char *encoding = NULL;
3272 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003273 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003274
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003275 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3276 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003277 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003278 if (v == NULL)
3279 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003280 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3281 PyErr_Format(PyExc_TypeError,
3282 "encoder did not return a string/unicode object "
3283 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003284 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003285 Py_DECREF(v);
3286 return NULL;
3287 }
3288 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003289
3290 onError:
3291 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003292}
3293
3294
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003295PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003296"S.decode([encoding[,errors]]) -> object\n\
3297\n\
3298Decodes S using the codec registered for encoding. encoding defaults\n\
3299to the default encoding. errors may be given to set a different error\n\
3300handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003301a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3302as well as any other name registerd with codecs.register_error that is\n\
3303able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003304
3305static PyObject *
3306string_decode(PyStringObject *self, PyObject *args)
3307{
3308 char *encoding = NULL;
3309 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003310 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003311
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003312 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3313 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003314 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003315 if (v == NULL)
3316 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003317 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3318 PyErr_Format(PyExc_TypeError,
3319 "decoder did not return a string/unicode object "
3320 "(type=%.400s)",
Martin v. Löwis68192102007-07-21 06:55:02 +00003321 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003322 Py_DECREF(v);
3323 return NULL;
3324 }
3325 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003326
3327 onError:
3328 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003329}
3330
3331
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003332PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003333"S.expandtabs([tabsize]) -> string\n\
3334\n\
3335Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003336If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003337
3338static PyObject*
3339string_expandtabs(PyStringObject *self, PyObject *args)
3340{
3341 const char *e, *p;
3342 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003343 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344 PyObject *u;
3345 int tabsize = 8;
3346
3347 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3348 return NULL;
3349
Thomas Wouters7e474022000-07-16 12:04:32 +00003350 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003351 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3353 for (p = PyString_AS_STRING(self); p < e; p++)
3354 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003355 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003357 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003358 PyErr_SetString(PyExc_OverflowError,
3359 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003360 return NULL;
3361 }
3362 old_j = j;
3363 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364 }
3365 else {
3366 j++;
3367 if (*p == '\n' || *p == '\r') {
3368 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003369 old_j = j = 0;
3370 if (i < 0) {
3371 PyErr_SetString(PyExc_OverflowError,
3372 "new string is too long");
3373 return NULL;
3374 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003375 }
3376 }
3377
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003378 if ((i + j) < 0) {
3379 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3380 return NULL;
3381 }
3382
Guido van Rossum4c08d552000-03-10 22:55:18 +00003383 /* Second pass: create output string and fill it */
3384 u = PyString_FromStringAndSize(NULL, i + j);
3385 if (!u)
3386 return NULL;
3387
3388 j = 0;
3389 q = PyString_AS_STRING(u);
3390
3391 for (p = PyString_AS_STRING(self); p < e; p++)
3392 if (*p == '\t') {
3393 if (tabsize > 0) {
3394 i = tabsize - (j % tabsize);
3395 j += i;
3396 while (i--)
3397 *q++ = ' ';
3398 }
3399 }
3400 else {
3401 j++;
3402 *q++ = *p;
3403 if (*p == '\n' || *p == '\r')
3404 j = 0;
3405 }
3406
3407 return u;
3408}
3409
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003410Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003411pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412{
3413 PyObject *u;
3414
3415 if (left < 0)
3416 left = 0;
3417 if (right < 0)
3418 right = 0;
3419
Tim Peters8fa5dd02001-09-12 02:18:30 +00003420 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003421 Py_INCREF(self);
3422 return (PyObject *)self;
3423 }
3424
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003425 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426 left + PyString_GET_SIZE(self) + right);
3427 if (u) {
3428 if (left)
3429 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003430 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003431 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432 PyString_GET_SIZE(self));
3433 if (right)
3434 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3435 fill, right);
3436 }
3437
3438 return u;
3439}
3440
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003441PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003443"\n"
3444"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446
3447static PyObject *
3448string_ljust(PyStringObject *self, PyObject *args)
3449{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003450 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003451 char fillchar = ' ';
3452
Thomas Wouters4abb3662006-04-19 14:50:15 +00003453 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454 return NULL;
3455
Tim Peters8fa5dd02001-09-12 02:18:30 +00003456 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003457 Py_INCREF(self);
3458 return (PyObject*) self;
3459 }
3460
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003461 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003462}
3463
3464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003465PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003466"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003467"\n"
3468"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003469"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003470
3471static PyObject *
3472string_rjust(PyStringObject *self, PyObject *args)
3473{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003474 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003475 char fillchar = ' ';
3476
Thomas Wouters4abb3662006-04-19 14:50:15 +00003477 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003478 return NULL;
3479
Tim Peters8fa5dd02001-09-12 02:18:30 +00003480 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003481 Py_INCREF(self);
3482 return (PyObject*) self;
3483 }
3484
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003485 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003486}
3487
3488
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003489PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003490"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003491"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003492"Return S centered in a string of length width. Padding is\n"
3493"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003494
3495static PyObject *
3496string_center(PyStringObject *self, PyObject *args)
3497{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003498 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003499 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003500 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501
Thomas Wouters4abb3662006-04-19 14:50:15 +00003502 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503 return NULL;
3504
Tim Peters8fa5dd02001-09-12 02:18:30 +00003505 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506 Py_INCREF(self);
3507 return (PyObject*) self;
3508 }
3509
3510 marg = width - PyString_GET_SIZE(self);
3511 left = marg / 2 + (marg & width & 1);
3512
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003513 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514}
3515
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003516PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003517"S.zfill(width) -> string\n"
3518"\n"
3519"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003520"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003521
3522static PyObject *
3523string_zfill(PyStringObject *self, PyObject *args)
3524{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003525 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003526 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003527 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003528 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003529
Thomas Wouters4abb3662006-04-19 14:50:15 +00003530 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003531 return NULL;
3532
3533 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003534 if (PyString_CheckExact(self)) {
3535 Py_INCREF(self);
3536 return (PyObject*) self;
3537 }
3538 else
3539 return PyString_FromStringAndSize(
3540 PyString_AS_STRING(self),
3541 PyString_GET_SIZE(self)
3542 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003543 }
3544
3545 fill = width - PyString_GET_SIZE(self);
3546
3547 s = pad(self, fill, 0, '0');
3548
3549 if (s == NULL)
3550 return NULL;
3551
3552 p = PyString_AS_STRING(s);
3553 if (p[fill] == '+' || p[fill] == '-') {
3554 /* move sign to beginning of string */
3555 p[0] = p[fill];
3556 p[fill] = '0';
3557 }
3558
3559 return (PyObject*) s;
3560}
3561
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003562PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003563"S.isspace() -> bool\n\
3564\n\
3565Return True if all characters in S are whitespace\n\
3566and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003567
3568static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003569string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003570{
Fred Drakeba096332000-07-09 07:04:36 +00003571 register const unsigned char *p
3572 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003573 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574
Guido van Rossum4c08d552000-03-10 22:55:18 +00003575 /* Shortcut for single character strings */
3576 if (PyString_GET_SIZE(self) == 1 &&
3577 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003579
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003580 /* Special case for empty strings */
3581 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003583
Guido van Rossum4c08d552000-03-10 22:55:18 +00003584 e = p + PyString_GET_SIZE(self);
3585 for (; p < e; p++) {
3586 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003589 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590}
3591
3592
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003593PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003594"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003595\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003596Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003597and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003598
3599static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003600string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003601{
Fred Drakeba096332000-07-09 07:04:36 +00003602 register const unsigned char *p
3603 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003604 register const unsigned char *e;
3605
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003606 /* Shortcut for single character strings */
3607 if (PyString_GET_SIZE(self) == 1 &&
3608 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610
3611 /* Special case for empty strings */
3612 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003614
3615 e = p + PyString_GET_SIZE(self);
3616 for (; p < e; p++) {
3617 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003619 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003620 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003621}
3622
3623
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003624PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003626\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003627Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003628and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003629
3630static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003631string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003632{
Fred Drakeba096332000-07-09 07:04:36 +00003633 register const unsigned char *p
3634 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003635 register const unsigned char *e;
3636
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003637 /* Shortcut for single character strings */
3638 if (PyString_GET_SIZE(self) == 1 &&
3639 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003641
3642 /* Special case for empty strings */
3643 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003645
3646 e = p + PyString_GET_SIZE(self);
3647 for (; p < e; p++) {
3648 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003650 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003651 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003652}
3653
3654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003655PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003658Return True if all characters in S are digits\n\
3659and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660
3661static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003662string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663{
Fred Drakeba096332000-07-09 07:04:36 +00003664 register const unsigned char *p
3665 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003666 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 /* Shortcut for single character strings */
3669 if (PyString_GET_SIZE(self) == 1 &&
3670 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003673 /* Special case for empty strings */
3674 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003676
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677 e = p + PyString_GET_SIZE(self);
3678 for (; p < e; p++) {
3679 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683}
3684
3685
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003686PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003690at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691
3692static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003693string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694{
Fred Drakeba096332000-07-09 07:04:36 +00003695 register const unsigned char *p
3696 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003697 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698 int cased;
3699
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 /* Shortcut for single character strings */
3701 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003704 /* Special case for empty strings */
3705 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003707
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708 e = p + PyString_GET_SIZE(self);
3709 cased = 0;
3710 for (; p < e; p++) {
3711 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003712 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713 else if (!cased && islower(*p))
3714 cased = 1;
3715 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717}
3718
3719
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003720PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003721"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003723Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003724at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725
3726static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003727string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728{
Fred Drakeba096332000-07-09 07:04:36 +00003729 register const unsigned char *p
3730 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003731 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732 int cased;
3733
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734 /* Shortcut for single character strings */
3735 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003738 /* Special case for empty strings */
3739 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003740 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003741
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742 e = p + PyString_GET_SIZE(self);
3743 cased = 0;
3744 for (; p < e; p++) {
3745 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003746 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 else if (!cased && isupper(*p))
3748 cased = 1;
3749 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751}
3752
3753
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003754PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003755"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003757Return True if S is a titlecased string and there is at least one\n\
3758character in S, i.e. uppercase characters may only follow uncased\n\
3759characters and lowercase characters only cased ones. Return False\n\
3760otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761
3762static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003763string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764{
Fred Drakeba096332000-07-09 07:04:36 +00003765 register const unsigned char *p
3766 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003767 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768 int cased, previous_is_cased;
3769
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770 /* Shortcut for single character strings */
3771 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003772 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003774 /* Special case for empty strings */
3775 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003776 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003777
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778 e = p + PyString_GET_SIZE(self);
3779 cased = 0;
3780 previous_is_cased = 0;
3781 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003782 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783
3784 if (isupper(ch)) {
3785 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003786 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 previous_is_cased = 1;
3788 cased = 1;
3789 }
3790 else if (islower(ch)) {
3791 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003792 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793 previous_is_cased = 1;
3794 cased = 1;
3795 }
3796 else
3797 previous_is_cased = 0;
3798 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003799 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800}
3801
3802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003803PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003804"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805\n\
3806Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003807Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003808is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810static PyObject*
3811string_splitlines(PyStringObject *self, PyObject *args)
3812{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003813 register Py_ssize_t i;
3814 register Py_ssize_t j;
3815 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003816 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 PyObject *list;
3818 PyObject *str;
3819 char *data;
3820
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003821 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822 return NULL;
3823
3824 data = PyString_AS_STRING(self);
3825 len = PyString_GET_SIZE(self);
3826
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003827 /* This does not use the preallocated list because splitlines is
3828 usually run with hundreds of newlines. The overhead of
3829 switching between PyList_SET_ITEM and append causes about a
3830 2-3% slowdown for that common case. A smarter implementation
3831 could move the if check out, so the SET_ITEMs are done first
3832 and the appends only done when the prealloc buffer is full.
3833 That's too much work for little gain.*/
3834
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835 list = PyList_New(0);
3836 if (!list)
3837 goto onError;
3838
3839 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003840 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003841
Guido van Rossum4c08d552000-03-10 22:55:18 +00003842 /* Find a line and append it */
3843 while (i < len && data[i] != '\n' && data[i] != '\r')
3844 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845
3846 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003847 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848 if (i < len) {
3849 if (data[i] == '\r' && i + 1 < len &&
3850 data[i+1] == '\n')
3851 i += 2;
3852 else
3853 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003854 if (keepends)
3855 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003856 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003857 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003858 j = i;
3859 }
3860 if (j < len) {
3861 SPLIT_APPEND(data, j, len);
3862 }
3863
3864 return list;
3865
3866 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003867 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003868 return NULL;
3869}
3870
3871#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003872#undef SPLIT_ADD
3873#undef MAX_PREALLOC
3874#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003875
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003876static PyObject *
3877string_getnewargs(PyStringObject *v)
3878{
Martin v. Löwis68192102007-07-21 06:55:02 +00003879 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003880}
3881
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003882
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003883static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003884string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003885 /* Counterparts of the obsolete stropmodule functions; except
3886 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003887 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3888 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003889 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003890 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3891 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003892 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3893 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3894 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3895 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3896 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3897 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3898 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003899 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3900 capitalize__doc__},
3901 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3902 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3903 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003904 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003905 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3906 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3907 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3908 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3909 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3910 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3911 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003912 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3913 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003914 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3915 startswith__doc__},
3916 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3917 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3918 swapcase__doc__},
3919 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3920 translate__doc__},
3921 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3922 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3923 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3924 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3925 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3926 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3927 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3928 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3929 expandtabs__doc__},
3930 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3931 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003932 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003933 {NULL, NULL} /* sentinel */
3934};
3935
Jeremy Hylton938ace62002-07-17 16:30:39 +00003936static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003937str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3938
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003939static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003940string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003941{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003942 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003943 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003944
Guido van Rossumae960af2001-08-30 03:11:59 +00003945 if (type != &PyString_Type)
3946 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003947 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3948 return NULL;
3949 if (x == NULL)
3950 return PyString_FromString("");
3951 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003952}
3953
Guido van Rossumae960af2001-08-30 03:11:59 +00003954static PyObject *
3955str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3956{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003957 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003958 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003959
3960 assert(PyType_IsSubtype(type, &PyString_Type));
3961 tmp = string_new(&PyString_Type, args, kwds);
3962 if (tmp == NULL)
3963 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003964 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003965 n = PyString_GET_SIZE(tmp);
3966 pnew = type->tp_alloc(type, n);
3967 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003968 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003969 ((PyStringObject *)pnew)->ob_shash =
3970 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003971 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003972 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003973 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003974 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003975}
3976
Guido van Rossumcacfc072002-05-24 19:01:59 +00003977static PyObject *
3978basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3979{
3980 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003981 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003982 return NULL;
3983}
3984
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003985static PyObject *
3986string_mod(PyObject *v, PyObject *w)
3987{
3988 if (!PyString_Check(v)) {
3989 Py_INCREF(Py_NotImplemented);
3990 return Py_NotImplemented;
3991 }
3992 return PyString_Format(v, w);
3993}
3994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003995PyDoc_STRVAR(basestring_doc,
3996"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003997
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003998static PyNumberMethods string_as_number = {
3999 0, /*nb_add*/
4000 0, /*nb_subtract*/
4001 0, /*nb_multiply*/
4002 0, /*nb_divide*/
4003 string_mod, /*nb_remainder*/
4004};
4005
4006
Guido van Rossumcacfc072002-05-24 19:01:59 +00004007PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004008 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004009 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004010 0,
4011 0,
4012 0, /* tp_dealloc */
4013 0, /* tp_print */
4014 0, /* tp_getattr */
4015 0, /* tp_setattr */
4016 0, /* tp_compare */
4017 0, /* tp_repr */
4018 0, /* tp_as_number */
4019 0, /* tp_as_sequence */
4020 0, /* tp_as_mapping */
4021 0, /* tp_hash */
4022 0, /* tp_call */
4023 0, /* tp_str */
4024 0, /* tp_getattro */
4025 0, /* tp_setattro */
4026 0, /* tp_as_buffer */
4027 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4028 basestring_doc, /* tp_doc */
4029 0, /* tp_traverse */
4030 0, /* tp_clear */
4031 0, /* tp_richcompare */
4032 0, /* tp_weaklistoffset */
4033 0, /* tp_iter */
4034 0, /* tp_iternext */
4035 0, /* tp_methods */
4036 0, /* tp_members */
4037 0, /* tp_getset */
4038 &PyBaseObject_Type, /* tp_base */
4039 0, /* tp_dict */
4040 0, /* tp_descr_get */
4041 0, /* tp_descr_set */
4042 0, /* tp_dictoffset */
4043 0, /* tp_init */
4044 0, /* tp_alloc */
4045 basestring_new, /* tp_new */
4046 0, /* tp_free */
4047};
4048
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004049PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050"str(object) -> string\n\
4051\n\
4052Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004053If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004056 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004057 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004058 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004059 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004060 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004061 (printfunc)string_print, /* tp_print */
4062 0, /* tp_getattr */
4063 0, /* tp_setattr */
4064 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004065 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004066 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004067 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004068 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004069 (hashfunc)string_hash, /* tp_hash */
4070 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004071 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004072 PyObject_GenericGetAttr, /* tp_getattro */
4073 0, /* tp_setattro */
4074 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004075 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004076 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004077 string_doc, /* tp_doc */
4078 0, /* tp_traverse */
4079 0, /* tp_clear */
4080 (richcmpfunc)string_richcompare, /* tp_richcompare */
4081 0, /* tp_weaklistoffset */
4082 0, /* tp_iter */
4083 0, /* tp_iternext */
4084 string_methods, /* tp_methods */
4085 0, /* tp_members */
4086 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004087 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004088 0, /* tp_dict */
4089 0, /* tp_descr_get */
4090 0, /* tp_descr_set */
4091 0, /* tp_dictoffset */
4092 0, /* tp_init */
4093 0, /* tp_alloc */
4094 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004095 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004096};
4097
4098void
Fred Drakeba096332000-07-09 07:04:36 +00004099PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004101 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004102 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004103 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004104 if (w == NULL || !PyString_Check(*pv)) {
4105 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004106 *pv = NULL;
4107 return;
4108 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004109 v = string_concat((PyStringObject *) *pv, w);
4110 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111 *pv = v;
4112}
4113
Guido van Rossum013142a1994-08-30 08:19:36 +00004114void
Fred Drakeba096332000-07-09 07:04:36 +00004115PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004116{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004117 PyString_Concat(pv, w);
4118 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004119}
4120
4121
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004122/* The following function breaks the notion that strings are immutable:
4123 it changes the size of a string. We get away with this only if there
4124 is only one module referencing the object. You can also think of it
4125 as creating a new string object and destroying the old one, only
4126 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004127 already be known to some other part of the code...
4128 Note that if there's not enough memory to resize the string, the original
4129 string object at *pv is deallocated, *pv is set to NULL, an "out of
4130 memory" exception is set, and -1 is returned. Else (on success) 0 is
4131 returned, and the value in *pv may or may not be the same as on input.
4132 As always, an extra byte is allocated for a trailing \0 byte (newsize
4133 does *not* include that), and a trailing \0 byte is stored.
4134*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004135
4136int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004137_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004138{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004139 register PyObject *v;
4140 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004141 v = *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004142 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004143 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004144 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004145 Py_DECREF(v);
4146 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004147 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004148 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004149 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004150 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004151 _Py_ForgetReference(v);
4152 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004153 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004154 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004155 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004156 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004157 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004158 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004159 _Py_NewReference(*pv);
4160 sv = (PyStringObject *) *pv;
Martin v. Löwis68192102007-07-21 06:55:02 +00004161 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004162 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004163 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004164 return 0;
4165}
Guido van Rossume5372401993-03-16 12:15:04 +00004166
4167/* Helpers for formatstring */
4168
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004169Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004170getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004171{
Thomas Wouters977485d2006-02-16 15:59:12 +00004172 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004173 if (argidx < arglen) {
4174 (*p_argidx)++;
4175 if (arglen < 0)
4176 return args;
4177 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004178 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004179 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004180 PyErr_SetString(PyExc_TypeError,
4181 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004182 return NULL;
4183}
4184
Tim Peters38fd5b62000-09-21 05:43:11 +00004185/* Format codes
4186 * F_LJUST '-'
4187 * F_SIGN '+'
4188 * F_BLANK ' '
4189 * F_ALT '#'
4190 * F_ZERO '0'
4191 */
Guido van Rossume5372401993-03-16 12:15:04 +00004192#define F_LJUST (1<<0)
4193#define F_SIGN (1<<1)
4194#define F_BLANK (1<<2)
4195#define F_ALT (1<<3)
4196#define F_ZERO (1<<4)
4197
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004198Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004199formatfloat(char *buf, size_t buflen, int flags,
4200 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004201{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004202 /* fmt = '%#.' + `prec` + `type`
4203 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004204 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004205 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004206 x = PyFloat_AsDouble(v);
4207 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004208 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis68192102007-07-21 06:55:02 +00004209 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004210 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004211 }
Guido van Rossume5372401993-03-16 12:15:04 +00004212 if (prec < 0)
4213 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004214 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4215 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004216 /* Worst case length calc to ensure no buffer overrun:
4217
4218 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004219 fmt = %#.<prec>g
4220 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004221 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004222 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004223
4224 'f' formats:
4225 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4226 len = 1 + 50 + 1 + prec = 52 + prec
4227
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004228 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004229 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004230
4231 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004232 if (((type == 'g' || type == 'G') &&
4233 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004234 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004235 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004236 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004237 return -1;
4238 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004239 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4240 (flags&F_ALT) ? "#" : "",
4241 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004242 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004243 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004244}
4245
Tim Peters38fd5b62000-09-21 05:43:11 +00004246/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4247 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4248 * Python's regular ints.
4249 * Return value: a new PyString*, or NULL if error.
4250 * . *pbuf is set to point into it,
4251 * *plen set to the # of chars following that.
4252 * Caller must decref it when done using pbuf.
4253 * The string starting at *pbuf is of the form
4254 * "-"? ("0x" | "0X")? digit+
4255 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004256 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004257 * There will be at least prec digits, zero-filled on the left if
4258 * necessary to get that many.
4259 * val object to be converted
4260 * flags bitmask of format flags; only F_ALT is looked at
4261 * prec minimum number of digits; 0-fill on left if needed
4262 * type a character in [duoxX]; u acts the same as d
4263 *
4264 * CAUTION: o, x and X conversions on regular ints can never
4265 * produce a '-' sign, but can for Python's unbounded ints.
4266 */
4267PyObject*
4268_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4269 char **pbuf, int *plen)
4270{
4271 PyObject *result = NULL;
4272 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004273 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004274 int sign; /* 1 if '-', else 0 */
4275 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004276 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004277 int numdigits; /* len == numnondigits + numdigits */
4278 int numnondigits = 0;
4279
4280 switch (type) {
4281 case 'd':
4282 case 'u':
Martin v. Löwis68192102007-07-21 06:55:02 +00004283 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004284 break;
4285 case 'o':
Martin v. Löwis68192102007-07-21 06:55:02 +00004286 result = Py_Type(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004287 break;
4288 case 'x':
4289 case 'X':
4290 numnondigits = 2;
Martin v. Löwis68192102007-07-21 06:55:02 +00004291 result = Py_Type(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004292 break;
4293 default:
4294 assert(!"'type' not in [duoxX]");
4295 }
4296 if (!result)
4297 return NULL;
4298
Neal Norwitz56423e52006-08-13 18:11:08 +00004299 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004300 if (!buf) {
4301 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004302 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004303 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004304
Tim Peters38fd5b62000-09-21 05:43:11 +00004305 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis68192102007-07-21 06:55:02 +00004306 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004307 PyErr_BadInternalCall();
4308 return NULL;
4309 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004310 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004311 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004312 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4313 return NULL;
4314 }
4315 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004316 if (buf[len-1] == 'L') {
4317 --len;
4318 buf[len] = '\0';
4319 }
4320 sign = buf[0] == '-';
4321 numnondigits += sign;
4322 numdigits = len - numnondigits;
4323 assert(numdigits > 0);
4324
Tim Petersfff53252001-04-12 18:38:48 +00004325 /* Get rid of base marker unless F_ALT */
4326 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004327 /* Need to skip 0x, 0X or 0. */
4328 int skipped = 0;
4329 switch (type) {
4330 case 'o':
4331 assert(buf[sign] == '0');
4332 /* If 0 is only digit, leave it alone. */
4333 if (numdigits > 1) {
4334 skipped = 1;
4335 --numdigits;
4336 }
4337 break;
4338 case 'x':
4339 case 'X':
4340 assert(buf[sign] == '0');
4341 assert(buf[sign + 1] == 'x');
4342 skipped = 2;
4343 numnondigits -= 2;
4344 break;
4345 }
4346 if (skipped) {
4347 buf += skipped;
4348 len -= skipped;
4349 if (sign)
4350 buf[0] = '-';
4351 }
4352 assert(len == numnondigits + numdigits);
4353 assert(numdigits > 0);
4354 }
4355
4356 /* Fill with leading zeroes to meet minimum width. */
4357 if (prec > numdigits) {
4358 PyObject *r1 = PyString_FromStringAndSize(NULL,
4359 numnondigits + prec);
4360 char *b1;
4361 if (!r1) {
4362 Py_DECREF(result);
4363 return NULL;
4364 }
4365 b1 = PyString_AS_STRING(r1);
4366 for (i = 0; i < numnondigits; ++i)
4367 *b1++ = *buf++;
4368 for (i = 0; i < prec - numdigits; i++)
4369 *b1++ = '0';
4370 for (i = 0; i < numdigits; i++)
4371 *b1++ = *buf++;
4372 *b1 = '\0';
4373 Py_DECREF(result);
4374 result = r1;
4375 buf = PyString_AS_STRING(result);
4376 len = numnondigits + prec;
4377 }
4378
4379 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004380 if (type == 'X') {
4381 /* Need to convert all lower case letters to upper case.
4382 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004383 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004384 if (buf[i] >= 'a' && buf[i] <= 'x')
4385 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004386 }
4387 *pbuf = buf;
4388 *plen = len;
4389 return result;
4390}
4391
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004392Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004393formatint(char *buf, size_t buflen, int flags,
4394 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004395{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004396 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004397 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4398 + 1 + 1 = 24 */
4399 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004400 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004401 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004402
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004403 x = PyInt_AsLong(v);
4404 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004405 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis68192102007-07-21 06:55:02 +00004406 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004407 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004408 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004409 if (x < 0 && type == 'u') {
4410 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004411 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004412 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4413 sign = "-";
4414 else
4415 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004416 if (prec < 0)
4417 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004418
4419 if ((flags & F_ALT) &&
4420 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004421 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004422 * of issues that cause pain:
4423 * - when 0 is being converted, the C standard leaves off
4424 * the '0x' or '0X', which is inconsistent with other
4425 * %#x/%#X conversions and inconsistent with Python's
4426 * hex() function
4427 * - there are platforms that violate the standard and
4428 * convert 0 with the '0x' or '0X'
4429 * (Metrowerks, Compaq Tru64)
4430 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004431 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004432 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004433 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004434 * We can achieve the desired consistency by inserting our
4435 * own '0x' or '0X' prefix, and substituting %x/%X in place
4436 * of %#x/%#X.
4437 *
4438 * Note that this is the same approach as used in
4439 * formatint() in unicodeobject.c
4440 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004441 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4442 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004443 }
4444 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004445 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4446 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004447 prec, type);
4448 }
4449
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004450 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4451 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004452 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004453 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004454 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004455 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004456 return -1;
4457 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004458 if (sign[0])
4459 PyOS_snprintf(buf, buflen, fmt, -x);
4460 else
4461 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004462 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004463}
4464
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004465Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004466formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004467{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004468 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004469 if (PyString_Check(v)) {
4470 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004471 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004472 }
4473 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004475 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004476 }
4477 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004478 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004479}
4480
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004481/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4482
4483 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4484 chars are formatted. XXX This is a magic number. Each formatting
4485 routine does bounds checking to ensure no overflow, but a better
4486 solution may be to malloc a buffer of appropriate size for each
4487 format. For now, the current solution is sufficient.
4488*/
4489#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004490
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004491PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004492PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004493{
4494 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004495 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004496 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004497 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004498 PyObject *result, *orig_args;
4499#ifdef Py_USING_UNICODE
4500 PyObject *v, *w;
4501#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 PyObject *dict = NULL;
4503 if (format == NULL || !PyString_Check(format) || args == NULL) {
4504 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004505 return NULL;
4506 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004507 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004508 fmt = PyString_AS_STRING(format);
4509 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004510 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004512 if (result == NULL)
4513 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004514 res = PyString_AsString(result);
4515 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004516 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004517 argidx = 0;
4518 }
4519 else {
4520 arglen = -1;
4521 argidx = -2;
4522 }
Martin v. Löwis68192102007-07-21 06:55:02 +00004523 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004524 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004525 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004526 while (--fmtcnt >= 0) {
4527 if (*fmt != '%') {
4528 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004529 rescnt = fmtcnt + 100;
4530 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004531 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004532 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004533 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004535 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004536 }
4537 *res++ = *fmt++;
4538 }
4539 else {
4540 /* Got a format specifier */
4541 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004542 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004543 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004544 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004545 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004546 PyObject *v = NULL;
4547 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004548 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004549 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004550 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004551 char formatbuf[FORMATBUFLEN];
4552 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004553#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004554 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004555 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004556#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004557
Guido van Rossumda9c2711996-12-05 21:58:58 +00004558 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004559 if (*fmt == '(') {
4560 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004561 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004562 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004563 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004564
4565 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004567 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 goto error;
4569 }
4570 ++fmt;
4571 --fmtcnt;
4572 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004573 /* Skip over balanced parentheses */
4574 while (pcount > 0 && --fmtcnt >= 0) {
4575 if (*fmt == ')')
4576 --pcount;
4577 else if (*fmt == '(')
4578 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004579 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004580 }
4581 keylen = fmt - keystart - 1;
4582 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004583 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004584 "incomplete format key");
4585 goto error;
4586 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004587 key = PyString_FromStringAndSize(keystart,
4588 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004589 if (key == NULL)
4590 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004591 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004593 args_owned = 0;
4594 }
4595 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004597 if (args == NULL) {
4598 goto error;
4599 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004600 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004601 arglen = -1;
4602 argidx = -2;
4603 }
Guido van Rossume5372401993-03-16 12:15:04 +00004604 while (--fmtcnt >= 0) {
4605 switch (c = *fmt++) {
4606 case '-': flags |= F_LJUST; continue;
4607 case '+': flags |= F_SIGN; continue;
4608 case ' ': flags |= F_BLANK; continue;
4609 case '#': flags |= F_ALT; continue;
4610 case '0': flags |= F_ZERO; continue;
4611 }
4612 break;
4613 }
4614 if (c == '*') {
4615 v = getnextarg(args, arglen, &argidx);
4616 if (v == NULL)
4617 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004618 if (!PyInt_Check(v)) {
4619 PyErr_SetString(PyExc_TypeError,
4620 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004621 goto error;
4622 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004623 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004624 if (width < 0) {
4625 flags |= F_LJUST;
4626 width = -width;
4627 }
Guido van Rossume5372401993-03-16 12:15:04 +00004628 if (--fmtcnt >= 0)
4629 c = *fmt++;
4630 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004631 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004632 width = c - '0';
4633 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004634 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004635 if (!isdigit(c))
4636 break;
4637 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004638 PyErr_SetString(
4639 PyExc_ValueError,
4640 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004641 goto error;
4642 }
4643 width = width*10 + (c - '0');
4644 }
4645 }
4646 if (c == '.') {
4647 prec = 0;
4648 if (--fmtcnt >= 0)
4649 c = *fmt++;
4650 if (c == '*') {
4651 v = getnextarg(args, arglen, &argidx);
4652 if (v == NULL)
4653 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 if (!PyInt_Check(v)) {
4655 PyErr_SetString(
4656 PyExc_TypeError,
4657 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004658 goto error;
4659 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004660 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004661 if (prec < 0)
4662 prec = 0;
4663 if (--fmtcnt >= 0)
4664 c = *fmt++;
4665 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004666 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004667 prec = c - '0';
4668 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004669 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004670 if (!isdigit(c))
4671 break;
4672 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004673 PyErr_SetString(
4674 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004675 "prec too big");
4676 goto error;
4677 }
4678 prec = prec*10 + (c - '0');
4679 }
4680 }
4681 } /* prec */
4682 if (fmtcnt >= 0) {
4683 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004684 if (--fmtcnt >= 0)
4685 c = *fmt++;
4686 }
4687 }
4688 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004689 PyErr_SetString(PyExc_ValueError,
4690 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004691 goto error;
4692 }
4693 if (c != '%') {
4694 v = getnextarg(args, arglen, &argidx);
4695 if (v == NULL)
4696 goto error;
4697 }
4698 sign = 0;
4699 fill = ' ';
4700 switch (c) {
4701 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004702 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004703 len = 1;
4704 break;
4705 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004706#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004707 if (PyUnicode_Check(v)) {
4708 fmt = fmt_start;
4709 argidx = argidx_start;
4710 goto unicode;
4711 }
Georg Brandld45014b2005-10-01 17:06:00 +00004712#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004713 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004714#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004715 if (temp != NULL && PyUnicode_Check(temp)) {
4716 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004717 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004718 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004719 goto unicode;
4720 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004721#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004722 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004723 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004724 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004725 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004726 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004727 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004728 if (!PyString_Check(temp)) {
4729 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004730 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004731 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004732 goto error;
4733 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004734 pbuf = PyString_AS_STRING(temp);
4735 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004736 if (prec >= 0 && len > prec)
4737 len = prec;
4738 break;
4739 case 'i':
4740 case 'd':
4741 case 'u':
4742 case 'o':
4743 case 'x':
4744 case 'X':
4745 if (c == 'i')
4746 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004747 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004748 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004749 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004750 prec, c, &pbuf, &ilen);
4751 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004752 if (!temp)
4753 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004754 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004755 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004756 else {
4757 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004758 len = formatint(pbuf,
4759 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004760 flags, prec, c, v);
4761 if (len < 0)
4762 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004763 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 }
4765 if (flags & F_ZERO)
4766 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004767 break;
4768 case 'e':
4769 case 'E':
4770 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004771 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004772 case 'g':
4773 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004774 if (c == 'F')
4775 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004776 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004777 len = formatfloat(pbuf, sizeof(formatbuf),
4778 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004779 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004780 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004781 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004782 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004783 fill = '0';
4784 break;
4785 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004786#ifdef Py_USING_UNICODE
4787 if (PyUnicode_Check(v)) {
4788 fmt = fmt_start;
4789 argidx = argidx_start;
4790 goto unicode;
4791 }
4792#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004793 pbuf = formatbuf;
4794 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004795 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004796 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004797 break;
4798 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004799 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004800 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004801 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004802 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004803 (Py_ssize_t)(fmt - 1 -
4804 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004805 goto error;
4806 }
4807 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004808 if (*pbuf == '-' || *pbuf == '+') {
4809 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004810 len--;
4811 }
4812 else if (flags & F_SIGN)
4813 sign = '+';
4814 else if (flags & F_BLANK)
4815 sign = ' ';
4816 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004818 }
4819 if (width < len)
4820 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004821 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004822 reslen -= rescnt;
4823 rescnt = width + fmtcnt + 100;
4824 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004825 if (reslen < 0) {
4826 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004827 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004828 return PyErr_NoMemory();
4829 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004830 if (_PyString_Resize(&result, reslen) < 0) {
4831 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004832 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004833 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004834 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004835 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004836 }
4837 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004838 if (fill != ' ')
4839 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004840 rescnt--;
4841 if (width > len)
4842 width--;
4843 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004844 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4845 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004846 assert(pbuf[1] == c);
4847 if (fill != ' ') {
4848 *res++ = *pbuf++;
4849 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004850 }
Tim Petersfff53252001-04-12 18:38:48 +00004851 rescnt -= 2;
4852 width -= 2;
4853 if (width < 0)
4854 width = 0;
4855 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004856 }
4857 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004858 do {
4859 --rescnt;
4860 *res++ = fill;
4861 } while (--width > len);
4862 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004863 if (fill == ' ') {
4864 if (sign)
4865 *res++ = sign;
4866 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004867 (c == 'x' || c == 'X')) {
4868 assert(pbuf[0] == '0');
4869 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004870 *res++ = *pbuf++;
4871 *res++ = *pbuf++;
4872 }
4873 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004874 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004875 res += len;
4876 rescnt -= len;
4877 while (--width >= len) {
4878 --rescnt;
4879 *res++ = ' ';
4880 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004881 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004882 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004883 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004884 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004885 goto error;
4886 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004887 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004888 } /* '%' */
4889 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004890 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004891 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004892 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004893 goto error;
4894 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004895 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004896 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004897 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004898 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004899 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004900
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004901#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004902 unicode:
4903 if (args_owned) {
4904 Py_DECREF(args);
4905 args_owned = 0;
4906 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004907 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004908 if (PyTuple_Check(orig_args) && argidx > 0) {
4909 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004910 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004911 v = PyTuple_New(n);
4912 if (v == NULL)
4913 goto error;
4914 while (--n >= 0) {
4915 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4916 Py_INCREF(w);
4917 PyTuple_SET_ITEM(v, n, w);
4918 }
4919 args = v;
4920 } else {
4921 Py_INCREF(orig_args);
4922 args = orig_args;
4923 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004924 args_owned = 1;
4925 /* Take what we have of the result and let the Unicode formatting
4926 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004927 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004928 if (_PyString_Resize(&result, rescnt))
4929 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004930 fmtcnt = PyString_GET_SIZE(format) - \
4931 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004932 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4933 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004934 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004935 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004936 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004937 if (v == NULL)
4938 goto error;
4939 /* Paste what we have (result) to what the Unicode formatting
4940 function returned (v) and return the result (or error) */
4941 w = PyUnicode_Concat(result, v);
4942 Py_DECREF(result);
4943 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004944 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004945 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004946#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004947
Guido van Rossume5372401993-03-16 12:15:04 +00004948 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004949 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004950 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004951 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004952 }
Guido van Rossume5372401993-03-16 12:15:04 +00004953 return NULL;
4954}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004955
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956void
Fred Drakeba096332000-07-09 07:04:36 +00004957PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004958{
4959 register PyStringObject *s = (PyStringObject *)(*p);
4960 PyObject *t;
4961 if (s == NULL || !PyString_Check(s))
4962 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004963 /* If it's a string subclass, we don't really know what putting
4964 it in the interned dict might do. */
4965 if (!PyString_CheckExact(s))
4966 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004967 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004968 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004969 if (interned == NULL) {
4970 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004971 if (interned == NULL) {
4972 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004973 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004974 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004975 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004976 t = PyDict_GetItem(interned, (PyObject *)s);
4977 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004978 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004979 Py_DECREF(*p);
4980 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004981 return;
4982 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004983
Armin Rigo79f7ad22004-08-07 19:27:39 +00004984 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004985 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004986 return;
4987 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004988 /* The two references in interned are not counted by refcnt.
4989 The string deallocator will take care of this */
Martin v. Löwis68192102007-07-21 06:55:02 +00004990 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004991 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004992}
4993
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004994void
4995PyString_InternImmortal(PyObject **p)
4996{
4997 PyString_InternInPlace(p);
4998 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4999 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5000 Py_INCREF(*p);
5001 }
5002}
5003
Guido van Rossum2a61e741997-01-18 07:55:05 +00005004
5005PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005006PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005007{
5008 PyObject *s = PyString_FromString(cp);
5009 if (s == NULL)
5010 return NULL;
5011 PyString_InternInPlace(&s);
5012 return s;
5013}
5014
Guido van Rossum8cf04761997-08-02 02:57:45 +00005015void
Fred Drakeba096332000-07-09 07:04:36 +00005016PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005017{
5018 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005019 for (i = 0; i < UCHAR_MAX + 1; i++) {
5020 Py_XDECREF(characters[i]);
5021 characters[i] = NULL;
5022 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005023 Py_XDECREF(nullstring);
5024 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005025}
Barry Warsawa903ad982001-02-23 16:40:48 +00005026
Barry Warsawa903ad982001-02-23 16:40:48 +00005027void _Py_ReleaseInternedStrings(void)
5028{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005029 PyObject *keys;
5030 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005031 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005032 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005033
5034 if (interned == NULL || !PyDict_Check(interned))
5035 return;
5036 keys = PyDict_Keys(interned);
5037 if (keys == NULL || !PyList_Check(keys)) {
5038 PyErr_Clear();
5039 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005040 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005041
5042 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5043 detector, interned strings are not forcibly deallocated; rather, we
5044 give them their stolen references back, and then clear and DECREF
5045 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005046
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005047 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005048 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5049 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005050 for (i = 0; i < n; i++) {
5051 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5052 switch (s->ob_sstate) {
5053 case SSTATE_NOT_INTERNED:
5054 /* XXX Shouldn't happen */
5055 break;
5056 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005057 Py_Refcnt(s) += 1;
5058 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005059 break;
5060 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis68192102007-07-21 06:55:02 +00005061 Py_Refcnt(s) += 2;
5062 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005063 break;
5064 default:
5065 Py_FatalError("Inconsistent interned string state.");
5066 }
5067 s->ob_sstate = SSTATE_NOT_INTERNED;
5068 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005069 fprintf(stderr, "total size of all interned strings: "
5070 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5071 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005072 Py_DECREF(keys);
5073 PyDict_Clear(interned);
5074 Py_DECREF(interned);
5075 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005076}