blob: ded1907fcaa1667ed02a65d222879694af8aca13 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define STRINGLIB_CHAR char
Fredrik Lundhb9479482006-05-26 17:22:38 +0000771#define STRINGLIB_NEW PyString_FromStringAndSize
772#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#include "stringlib/fastsearch.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000775#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000776
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778/* Methods */
779
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000780static int
Fred Drakeba096332000-07-09 07:04:36 +0000781string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000783 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000786
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000787 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000788 if (! PyString_CheckExact(op)) {
789 int ret;
790 /* A str subclass may have its own __str__ method. */
791 op = (PyStringObject *) PyObject_Str((PyObject *)op);
792 if (op == NULL)
793 return -1;
794 ret = string_print(op, fp, flags);
795 Py_DECREF(op);
796 return ret;
797 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000798 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000799#ifdef __VMS
800 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
801#else
802 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
803#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000804 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000805 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000806
Thomas Wouters7e474022000-07-16 12:04:32 +0000807 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000808 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000809 if (memchr(op->ob_sval, '\'', op->ob_size) &&
810 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000811 quote = '"';
812
813 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 for (i = 0; i < op->ob_size; i++) {
815 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000817 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000818 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000819 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000820 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000821 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000822 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000823 fprintf(fp, "\\r");
824 else if (c < ' ' || c >= 0x7f)
825 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000826 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000830 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831}
832
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000833PyObject *
834PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000837 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000838 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000839 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000840 PyErr_SetString(PyExc_OverflowError,
841 "string is too large to make repr");
842 }
843 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000845 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000846 }
847 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000848 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 register char c;
850 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 int quote;
852
Thomas Wouters7e474022000-07-16 12:04:32 +0000853 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000854 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000855 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000857 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 quote = '"';
859
Tim Peters9161c8b2001-12-03 01:55:38 +0000860 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000861 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000862 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000863 /* There's at least enough room for a hex escape
864 and a closing quote. */
865 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000869 else if (c == '\t')
870 *p++ = '\\', *p++ = 't';
871 else if (c == '\n')
872 *p++ = '\\', *p++ = 'n';
873 else if (c == '\r')
874 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000875 else if (c < ' ' || c >= 0x7f) {
876 /* For performance, we don't want to call
877 PyOS_snprintf here (extra layers of
878 function call). */
879 sprintf(p, "\\x%02x", c & 0xff);
880 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000881 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000882 else
883 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000885 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000889 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000890 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892}
893
Guido van Rossum189f1df2001-05-01 16:51:53 +0000894static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000895string_repr(PyObject *op)
896{
897 return PyString_Repr(op, 1);
898}
899
900static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000901string_str(PyObject *s)
902{
Tim Petersc9933152001-10-16 20:18:24 +0000903 assert(PyString_Check(s));
904 if (PyString_CheckExact(s)) {
905 Py_INCREF(s);
906 return s;
907 }
908 else {
909 /* Subtype -- return genuine string with the same value. */
910 PyStringObject *t = (PyStringObject *) s;
911 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
912 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000913}
914
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000916string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917{
918 return a->ob_size;
919}
920
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000922string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923{
Andrew Dalke598710c2006-05-25 18:18:39 +0000924 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925 register PyStringObject *op;
926 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000927#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000928 if (PyUnicode_Check(bb))
929 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000930#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000931 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000932 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000933 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 return NULL;
935 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000936#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000938 if ((a->ob_size == 0 || b->ob_size == 0) &&
939 PyString_CheckExact(a) && PyString_CheckExact(b)) {
940 if (a->ob_size == 0) {
941 Py_INCREF(bb);
942 return bb;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 Py_INCREF(a);
945 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 }
947 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000948 if (size < 0) {
949 PyErr_SetString(PyExc_OverflowError,
950 "strings are too large to concat");
951 return NULL;
952 }
953
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000954 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000955 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000961 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
962 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000963 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965#undef b
966}
967
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000969string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000971 register Py_ssize_t i;
972 register Py_ssize_t j;
973 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000975 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976 if (n < 0)
977 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000978 /* watch out for overflows: the size can overflow int,
979 * and the # of bytes needed can overflow size_t
980 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000982 if (n && size / n != a->ob_size) {
983 PyErr_SetString(PyExc_OverflowError,
984 "repeated string is too long");
985 return NULL;
986 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000987 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 Py_INCREF(a);
989 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 }
Tim Peterse7c05322004-06-27 17:24:49 +0000991 nbytes = (size_t)size;
992 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000993 PyErr_SetString(PyExc_OverflowError,
994 "repeated string is too long");
995 return NULL;
996 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000998 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000999 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001001 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001002 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001003 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001004 op->ob_sval[size] = '\0';
1005 if (a->ob_size == 1 && n > 0) {
1006 memset(op->ob_sval, a->ob_sval[0] , n);
1007 return (PyObject *) op;
1008 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001009 i = 0;
1010 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001011 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1012 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001013 }
1014 while (i < size) {
1015 j = (i <= size-i) ? i : size-i;
1016 memcpy(op->ob_sval+i, op->ob_sval, j);
1017 i += j;
1018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020}
1021
1022/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1023
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001025string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001026 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001027 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028{
1029 if (i < 0)
1030 i = 0;
1031 if (j < 0)
1032 j = 0; /* Avoid signed/unsigned bug in next line */
1033 if (j > a->ob_size)
1034 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001035 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1036 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001037 Py_INCREF(a);
1038 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 }
1040 if (j < i)
1041 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001042 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
Guido van Rossum9284a572000-03-07 15:53:43 +00001045static int
Fred Drakeba096332000-07-09 07:04:36 +00001046string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001047{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001048 char *s = PyString_AS_STRING(a);
1049 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001051 Py_ssize_t pos;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001052
1053 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001054#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001055 if (PyUnicode_Check(el))
1056 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001058 if (!PyString_Check(el)) {
1059 PyErr_SetString(PyExc_TypeError,
1060 "'in <string>' requires string as left operand");
1061 return -1;
1062 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001063 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001064
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001065 if (len_sub == 0)
1066 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001067
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001068 pos = fastsearch(
1069 s, PyString_GET_SIZE(a),
1070 sub, len_sub, FAST_SEARCH
1071 );
Fredrik Lundh3a65d872006-05-26 17:31:41 +00001072
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001073 return (pos != -1);
Guido van Rossum9284a572000-03-07 15:53:43 +00001074}
1075
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001077string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001080 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001081 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001082 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001083 return NULL;
1084 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001085 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001086 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001087 if (v == NULL)
1088 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001089 else {
1090#ifdef COUNT_ALLOCS
1091 one_strings++;
1092#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001093 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001094 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001095 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096}
1097
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098static PyObject*
1099string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001102 Py_ssize_t len_a, len_b;
1103 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 PyObject *result;
1105
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001106 /* Make sure both arguments are strings. */
1107 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108 result = Py_NotImplemented;
1109 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001110 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 if (a == b) {
1112 switch (op) {
1113 case Py_EQ:case Py_LE:case Py_GE:
1114 result = Py_True;
1115 goto out;
1116 case Py_NE:case Py_LT:case Py_GT:
1117 result = Py_False;
1118 goto out;
1119 }
1120 }
1121 if (op == Py_EQ) {
1122 /* Supporting Py_NE here as well does not save
1123 much time, since Py_NE is rarely used. */
1124 if (a->ob_size == b->ob_size
1125 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001126 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 a->ob_size) == 0)) {
1128 result = Py_True;
1129 } else {
1130 result = Py_False;
1131 }
1132 goto out;
1133 }
1134 len_a = a->ob_size; len_b = b->ob_size;
1135 min_len = (len_a < len_b) ? len_a : len_b;
1136 if (min_len > 0) {
1137 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1138 if (c==0)
1139 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1140 }else
1141 c = 0;
1142 if (c == 0)
1143 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1144 switch (op) {
1145 case Py_LT: c = c < 0; break;
1146 case Py_LE: c = c <= 0; break;
1147 case Py_EQ: assert(0); break; /* unreachable */
1148 case Py_NE: c = c != 0; break;
1149 case Py_GT: c = c > 0; break;
1150 case Py_GE: c = c >= 0; break;
1151 default:
1152 result = Py_NotImplemented;
1153 goto out;
1154 }
1155 result = c ? Py_True : Py_False;
1156 out:
1157 Py_INCREF(result);
1158 return result;
1159}
1160
1161int
1162_PyString_Eq(PyObject *o1, PyObject *o2)
1163{
1164 PyStringObject *a, *b;
1165 a = (PyStringObject*)o1;
1166 b = (PyStringObject*)o2;
1167 return a->ob_size == b->ob_size
1168 && *a->ob_sval == *b->ob_sval
1169 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001170}
1171
Guido van Rossum9bfef441993-03-29 10:43:31 +00001172static long
Fred Drakeba096332000-07-09 07:04:36 +00001173string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001174{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001175 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001176 register unsigned char *p;
1177 register long x;
1178
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001179 if (a->ob_shash != -1)
1180 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001181 len = a->ob_size;
1182 p = (unsigned char *) a->ob_sval;
1183 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001185 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001186 x ^= a->ob_size;
1187 if (x == -1)
1188 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 return x;
1191}
1192
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001193#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1194
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195static PyObject*
1196string_subscript(PyStringObject* self, PyObject* item)
1197{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001198 PyNumberMethods *nb = item->ob_type->tp_as_number;
1199 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1200 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201 if (i == -1 && PyErr_Occurred())
1202 return NULL;
1203 if (i < 0)
1204 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001205 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 }
1207 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001208 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 char* source_buf;
1210 char* result_buf;
1211 PyObject* result;
1212
Tim Petersae1d0c92006-03-17 03:29:34 +00001213 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 PyString_GET_SIZE(self),
1215 &start, &stop, &step, &slicelength) < 0) {
1216 return NULL;
1217 }
1218
1219 if (slicelength <= 0) {
1220 return PyString_FromStringAndSize("", 0);
1221 }
1222 else {
1223 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001224 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001225 if (result_buf == NULL)
1226 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227
Tim Petersae1d0c92006-03-17 03:29:34 +00001228 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001229 cur += step, i++) {
1230 result_buf[i] = source_buf[cur];
1231 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001232
1233 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 slicelength);
1235 PyMem_Free(result_buf);
1236 return result;
1237 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001238 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001240 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241 "string indices must be integers");
1242 return NULL;
1243 }
1244}
1245
Martin v. Löwis18e16552006-02-15 17:27:45 +00001246static Py_ssize_t
1247string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001248{
1249 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001250 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001251 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252 return -1;
1253 }
1254 *ptr = (void *)self->ob_sval;
1255 return self->ob_size;
1256}
1257
Martin v. Löwis18e16552006-02-15 17:27:45 +00001258static Py_ssize_t
1259string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001260{
Guido van Rossum045e6881997-09-08 18:30:11 +00001261 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001262 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263 return -1;
1264}
1265
Martin v. Löwis18e16552006-02-15 17:27:45 +00001266static Py_ssize_t
1267string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268{
1269 if ( lenp )
1270 *lenp = self->ob_size;
1271 return 1;
1272}
1273
Martin v. Löwis18e16552006-02-15 17:27:45 +00001274static Py_ssize_t
1275string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001276{
1277 if ( index != 0 ) {
1278 PyErr_SetString(PyExc_SystemError,
1279 "accessing non-existent string segment");
1280 return -1;
1281 }
1282 *ptr = self->ob_sval;
1283 return self->ob_size;
1284}
1285
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001286static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001287 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001288 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001289 (ssizeargfunc)string_repeat, /*sq_repeat*/
1290 (ssizeargfunc)string_item, /*sq_item*/
1291 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001292 0, /*sq_ass_item*/
1293 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001294 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001295};
1296
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001297static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001299 (binaryfunc)string_subscript,
1300 0,
1301};
1302
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001303static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (readbufferproc)string_buffer_getreadbuf,
1305 (writebufferproc)string_buffer_getwritebuf,
1306 (segcountproc)string_buffer_getsegcount,
1307 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001308};
1309
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310
1311
1312#define LEFTSTRIP 0
1313#define RIGHTSTRIP 1
1314#define BOTHSTRIP 2
1315
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001316/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001317static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1318
1319#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001320
Andrew Dalke525eab32006-05-26 14:00:45 +00001321
1322/* Overallocate the initial list to reduce the number of reallocs for small
1323 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1324 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1325 text (roughly 11 words per line) and field delimited data (usually 1-10
1326 fields). For large strings the split algorithms are bandwidth limited
1327 so increasing the preallocation likely will not improve things.*/
1328
1329#define MAX_PREALLOC 12
1330
1331/* 5 splits gives 6 elements */
1332#define PREALLOC_SIZE(maxsplit) \
1333 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1334
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335#define SPLIT_APPEND(data, left, right) \
1336 str = PyString_FromStringAndSize((data) + (left), \
1337 (right) - (left)); \
1338 if (str == NULL) \
1339 goto onError; \
1340 if (PyList_Append(list, str)) { \
1341 Py_DECREF(str); \
1342 goto onError; \
1343 } \
1344 else \
1345 Py_DECREF(str);
1346
Andrew Dalke02758d62006-05-26 15:21:01 +00001347#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001348 str = PyString_FromStringAndSize((data) + (left), \
1349 (right) - (left)); \
1350 if (str == NULL) \
1351 goto onError; \
1352 if (count < MAX_PREALLOC) { \
1353 PyList_SET_ITEM(list, count, str); \
1354 } else { \
1355 if (PyList_Append(list, str)) { \
1356 Py_DECREF(str); \
1357 goto onError; \
1358 } \
1359 else \
1360 Py_DECREF(str); \
1361 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001362 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001363
1364/* Always force the list to the expected size. */
1365#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1366
Andrew Dalke02758d62006-05-26 15:21:01 +00001367#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1368#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1369#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1370#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1371
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001372Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001373split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374{
Andrew Dalke525eab32006-05-26 14:00:45 +00001375 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001376 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001377 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378
1379 if (list == NULL)
1380 return NULL;
1381
Andrew Dalke02758d62006-05-26 15:21:01 +00001382 i = j = 0;
1383
1384 while (maxsplit-- > 0) {
1385 SKIP_SPACE(s, i, len);
1386 if (i==len) break;
1387 j = i; i++;
1388 SKIP_NONSPACE(s, i, len);
1389 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001391
1392 if (i < len) {
1393 /* Only occurs when maxsplit was reached */
1394 /* Skip any remaining whitespace and copy to end of string */
1395 SKIP_SPACE(s, i, len);
1396 if (i != len)
1397 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001399 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001401 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 Py_DECREF(list);
1403 return NULL;
1404}
1405
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001406Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001407split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408{
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001411 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412
1413 if (list == NULL)
1414 return NULL;
1415
1416 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001417 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001418 if (s[i] == ch) {
1419 if (maxcount-- <= 0)
1420 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422 i = j = i + 1;
1423 } else
1424 i++;
1425 }
1426 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001427 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001429 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430 return list;
1431
1432 onError:
1433 Py_DECREF(list);
1434 return NULL;
1435}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001437PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438"S.split([sep [,maxsplit]]) -> list of strings\n\
1439\n\
1440Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001441delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001442splits are done. If sep is not specified or is None, any\n\
1443whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444
1445static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001446string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001448 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001449 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001451 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452
Martin v. Löwis9c830762006-04-13 08:37:17 +00001453 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001456 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001457 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001459 if (PyString_Check(subobj)) {
1460 sub = PyString_AS_STRING(subobj);
1461 n = PyString_GET_SIZE(subobj);
1462 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001463#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 else if (PyUnicode_Check(subobj))
1465 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001466#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001467 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1468 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001469
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 if (n == 0) {
1471 PyErr_SetString(PyExc_ValueError, "empty separator");
1472 return NULL;
1473 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 else if (n == 1)
1475 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476
Andrew Dalke525eab32006-05-26 14:00:45 +00001477 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478 if (list == NULL)
1479 return NULL;
1480
1481 i = j = 0;
1482 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001484 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 if (maxsplit-- <= 0)
1486 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001487 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 }
1490 else
1491 i++;
1492 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001493 SPLIT_ADD(s, j, len);
1494 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 return list;
1496
Andrew Dalke525eab32006-05-26 14:00:45 +00001497 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498 Py_DECREF(list);
1499 return NULL;
1500}
1501
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001502PyDoc_STRVAR(partition__doc__,
1503"S.partition(sep) -> (head, sep, tail)\n\
1504\n\
1505Searches for the separator sep in S, and returns the part before it,\n\
1506the separator itself, and the part after it. If the separator is not\n\
1507found, returns S and two empty strings.");
1508
1509static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001510string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001511{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001512 const char *sep;
1513 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001514
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001515 if (PyString_Check(sep_obj)) {
1516 sep = PyString_AS_STRING(sep_obj);
1517 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001518 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001519#ifdef Py_USING_UNICODE
1520 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001521 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001522#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001523 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001524 return NULL;
1525
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001526 return partition(
1527 (PyObject*) self,
1528 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1529 sep_obj, sep, sep_len
1530 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001531}
1532
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001533Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001534rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001535{
Andrew Dalke525eab32006-05-26 14:00:45 +00001536 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001537 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001538 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001539
1540 if (list == NULL)
1541 return NULL;
1542
Andrew Dalke02758d62006-05-26 15:21:01 +00001543 i = j = len-1;
1544
1545 while (maxsplit-- > 0) {
1546 RSKIP_SPACE(s, i);
1547 if (i<0) break;
1548 j = i; i--;
1549 RSKIP_NONSPACE(s, i);
1550 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001551 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001552 if (i >= 0) {
1553 /* Only occurs when maxsplit was reached */
1554 /* Skip any remaining whitespace and copy to beginning of string */
1555 RSKIP_SPACE(s, i);
1556 if (i >= 0)
1557 SPLIT_ADD(s, 0, i + 1);
1558
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001559 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001560 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001561 if (PyList_Reverse(list) < 0)
1562 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001563 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001564 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001565 Py_DECREF(list);
1566 return NULL;
1567}
1568
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001569Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001570rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001571{
Andrew Dalke525eab32006-05-26 14:00:45 +00001572 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001573 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001574 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001575
1576 if (list == NULL)
1577 return NULL;
1578
1579 for (i = j = len - 1; i >= 0; ) {
1580 if (s[i] == ch) {
1581 if (maxcount-- <= 0)
1582 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001583 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001584 j = i = i - 1;
1585 } else
1586 i--;
1587 }
1588 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001589 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001590 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001591 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001592 if (PyList_Reverse(list) < 0)
1593 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001594 return list;
1595
1596 onError:
1597 Py_DECREF(list);
1598 return NULL;
1599}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001600
1601PyDoc_STRVAR(rsplit__doc__,
1602"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1603\n\
1604Return a list of the words in the string S, using sep as the\n\
1605delimiter string, starting at the end of the string and working\n\
1606to the front. If maxsplit is given, at most maxsplit splits are\n\
1607done. If sep is not specified or is None, any whitespace string\n\
1608is a separator.");
1609
1610static PyObject *
1611string_rsplit(PyStringObject *self, PyObject *args)
1612{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001613 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001614 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001615 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617
Martin v. Löwis9c830762006-04-13 08:37:17 +00001618 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001619 return NULL;
1620 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001621 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001622 if (subobj == Py_None)
1623 return rsplit_whitespace(s, len, maxsplit);
1624 if (PyString_Check(subobj)) {
1625 sub = PyString_AS_STRING(subobj);
1626 n = PyString_GET_SIZE(subobj);
1627 }
1628#ifdef Py_USING_UNICODE
1629 else if (PyUnicode_Check(subobj))
1630 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1631#endif
1632 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1633 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001635 if (n == 0) {
1636 PyErr_SetString(PyExc_ValueError, "empty separator");
1637 return NULL;
1638 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639 else if (n == 1)
1640 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001641
Andrew Dalke525eab32006-05-26 14:00:45 +00001642 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001643 if (list == NULL)
1644 return NULL;
1645
1646 j = len;
1647 i = j - n;
1648 while (i >= 0) {
1649 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1650 if (maxsplit-- <= 0)
1651 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001652 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001653 j = i;
1654 i -= n;
1655 }
1656 else
1657 i--;
1658 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001659 SPLIT_ADD(s, 0, j);
1660 FIX_PREALLOC_SIZE(list);
1661 if (PyList_Reverse(list) < 0)
1662 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001663 return list;
1664
Andrew Dalke525eab32006-05-26 14:00:45 +00001665onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 Py_DECREF(list);
1667 return NULL;
1668}
1669
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001671PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672"S.join(sequence) -> string\n\
1673\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001675sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676
1677static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001678string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679{
1680 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001683 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001684 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001685 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001686 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001687 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688
Tim Peters19fe14e2001-01-19 03:03:47 +00001689 seq = PySequence_Fast(orig, "");
1690 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001691 return NULL;
1692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001694 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001695 if (seqlen == 0) {
1696 Py_DECREF(seq);
1697 return PyString_FromString("");
1698 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001700 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001701 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1702 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001703 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001704 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001705 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001707
Raymond Hettinger674f2412004-08-23 23:23:54 +00001708 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001709 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001710 * Do a pre-pass to figure out the total amount of space we'll
1711 * need (sz), see whether any argument is absurd, and defer to
1712 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001713 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001714 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001715 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001716 item = PySequence_Fast_GET_ITEM(seq, i);
1717 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001718#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001719 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001720 /* Defer to Unicode join.
1721 * CAUTION: There's no gurantee that the
1722 * original sequence can be iterated over
1723 * again, so we must pass seq here.
1724 */
1725 PyObject *result;
1726 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001727 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001728 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001729 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001730#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001731 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001732 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001733 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001734 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001735 Py_DECREF(seq);
1736 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001737 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001738 sz += PyString_GET_SIZE(item);
1739 if (i != 0)
1740 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001741 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 PyErr_SetString(PyExc_OverflowError,
1743 "join() is too long for a Python string");
1744 Py_DECREF(seq);
1745 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001747 }
1748
1749 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001750 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001751 if (res == NULL) {
1752 Py_DECREF(seq);
1753 return NULL;
1754 }
1755
1756 /* Catenate everything. */
1757 p = PyString_AS_STRING(res);
1758 for (i = 0; i < seqlen; ++i) {
1759 size_t n;
1760 item = PySequence_Fast_GET_ITEM(seq, i);
1761 n = PyString_GET_SIZE(item);
1762 memcpy(p, PyString_AS_STRING(item), n);
1763 p += n;
1764 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001765 memcpy(p, sep, seplen);
1766 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001767 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001769
Jeremy Hylton49048292000-07-11 03:28:17 +00001770 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772}
1773
Tim Peters52e155e2001-06-16 05:42:57 +00001774PyObject *
1775_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001776{
Tim Petersa7259592001-06-16 05:11:17 +00001777 assert(sep != NULL && PyString_Check(sep));
1778 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001779 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001780}
1781
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001782Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001783string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001784{
1785 if (*end > len)
1786 *end = len;
1787 else if (*end < 0)
1788 *end += len;
1789 if (*end < 0)
1790 *end = 0;
1791 if (*start < 0)
1792 *start += len;
1793 if (*start < 0)
1794 *start = 0;
1795}
1796
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001797Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001798string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001800 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001801 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001802 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001803 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804
Martin v. Löwis18e16552006-02-15 17:27:45 +00001805 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001806 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001807 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001808 return -2;
1809 if (PyString_Check(subobj)) {
1810 sub = PyString_AS_STRING(subobj);
1811 n = PyString_GET_SIZE(subobj);
1812 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001813#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001814 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001815 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001816#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 return -2;
1819
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001820 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001822 if (n == 0)
1823 return (dir > 0) ? i : last;
1824 if (dir > 0) {
1825 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1826 FAST_SEARCH);
1827 if (pos < 0)
1828 return pos;
1829 return pos + i;
Fredrik Lundh3a65d872006-05-26 17:31:41 +00001830 } else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001832
Guido van Rossum4c08d552000-03-10 22:55:18 +00001833 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001834 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001836 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001837 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001838 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001839
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840 return -1;
1841}
1842
1843
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001844PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845"S.find(sub [,start [,end]]) -> int\n\
1846\n\
1847Return the lowest index in S where substring sub is found,\n\
1848such that sub is contained within s[start,end]. Optional\n\
1849arguments start and end are interpreted as in slice notation.\n\
1850\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001851Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
1853static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001854string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001856 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857 if (result == -2)
1858 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001859 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860}
1861
1862
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001863PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864"S.index(sub [,start [,end]]) -> int\n\
1865\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001866Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867
1868static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001869string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001871 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 if (result == -2)
1873 return NULL;
1874 if (result == -1) {
1875 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001876 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877 return NULL;
1878 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001879 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880}
1881
1882
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001883PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884"S.rfind(sub [,start [,end]]) -> int\n\
1885\n\
1886Return the highest index in S where substring sub is found,\n\
1887such that sub is contained within s[start,end]. Optional\n\
1888arguments start and end are interpreted as in slice notation.\n\
1889\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001890Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891
1892static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001893string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001895 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896 if (result == -2)
1897 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001898 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899}
1900
1901
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001902PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903"S.rindex(sub [,start [,end]]) -> int\n\
1904\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001905Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906
1907static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001908string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001910 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 if (result == -2)
1912 return NULL;
1913 if (result == -1) {
1914 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001915 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 return NULL;
1917 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001918 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919}
1920
1921
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001922Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001923do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1924{
1925 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001926 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001927 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001928 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1929 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001930
1931 i = 0;
1932 if (striptype != RIGHTSTRIP) {
1933 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1934 i++;
1935 }
1936 }
1937
1938 j = len;
1939 if (striptype != LEFTSTRIP) {
1940 do {
1941 j--;
1942 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1943 j++;
1944 }
1945
1946 if (i == 0 && j == len && PyString_CheckExact(self)) {
1947 Py_INCREF(self);
1948 return (PyObject*)self;
1949 }
1950 else
1951 return PyString_FromStringAndSize(s+i, j-i);
1952}
1953
1954
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001955Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001956do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
1958 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001959 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 i = 0;
1962 if (striptype != RIGHTSTRIP) {
1963 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1964 i++;
1965 }
1966 }
1967
1968 j = len;
1969 if (striptype != LEFTSTRIP) {
1970 do {
1971 j--;
1972 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1973 j++;
1974 }
1975
Tim Peters8fa5dd02001-09-12 02:18:30 +00001976 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 Py_INCREF(self);
1978 return (PyObject*)self;
1979 }
1980 else
1981 return PyString_FromStringAndSize(s+i, j-i);
1982}
1983
1984
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001985Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1987{
1988 PyObject *sep = NULL;
1989
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001990 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991 return NULL;
1992
1993 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001994 if (PyString_Check(sep))
1995 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001996#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001997 else if (PyUnicode_Check(sep)) {
1998 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1999 PyObject *res;
2000 if (uniself==NULL)
2001 return NULL;
2002 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2003 striptype, sep);
2004 Py_DECREF(uniself);
2005 return res;
2006 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002007#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002008 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002009#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002010 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002011#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002012 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002013#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002014 STRIPNAME(striptype));
2015 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002016 }
2017
2018 return do_strip(self, striptype);
2019}
2020
2021
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002022PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002023"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024\n\
2025Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002026whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002027If chars is given and not None, remove characters in chars instead.\n\
2028If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029
2030static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002031string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002033 if (PyTuple_GET_SIZE(args) == 0)
2034 return do_strip(self, BOTHSTRIP); /* Common case */
2035 else
2036 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037}
2038
2039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002040PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002041"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002044If chars is given and not None, remove characters in chars instead.\n\
2045If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046
2047static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002048string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002050 if (PyTuple_GET_SIZE(args) == 0)
2051 return do_strip(self, LEFTSTRIP); /* Common case */
2052 else
2053 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054}
2055
2056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002058"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002061If chars is given and not None, remove characters in chars instead.\n\
2062If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
2064static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002065string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067 if (PyTuple_GET_SIZE(args) == 0)
2068 return do_strip(self, RIGHTSTRIP); /* Common case */
2069 else
2070 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071}
2072
2073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002074PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075"S.lower() -> string\n\
2076\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002077Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002079/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2080#ifndef _tolower
2081#define _tolower tolower
2082#endif
2083
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002085string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002087 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002088 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002089 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002091 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002092 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002094
2095 s = PyString_AS_STRING(newobj);
2096
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002097 memcpy(s, PyString_AS_STRING(self), n);
2098
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002100 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002101 if (isupper(c))
2102 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002104
Anthony Baxtera6286212006-04-11 07:42:36 +00002105 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106}
2107
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002108PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109"S.upper() -> string\n\
2110\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002113#ifndef _toupper
2114#define _toupper toupper
2115#endif
2116
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002118string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002120 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002121 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002122 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002124 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002125 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002127
2128 s = PyString_AS_STRING(newobj);
2129
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002130 memcpy(s, PyString_AS_STRING(self), n);
2131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002133 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002134 if (islower(c))
2135 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002137
Anthony Baxtera6286212006-04-11 07:42:36 +00002138 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139}
2140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142"S.title() -> string\n\
2143\n\
2144Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002146
2147static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002148string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002149{
2150 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002151 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002153 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154
Anthony Baxtera6286212006-04-11 07:42:36 +00002155 newobj = PyString_FromStringAndSize(NULL, n);
2156 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002157 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002158 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159 for (i = 0; i < n; i++) {
2160 int c = Py_CHARMASK(*s++);
2161 if (islower(c)) {
2162 if (!previous_is_cased)
2163 c = toupper(c);
2164 previous_is_cased = 1;
2165 } else if (isupper(c)) {
2166 if (previous_is_cased)
2167 c = tolower(c);
2168 previous_is_cased = 1;
2169 } else
2170 previous_is_cased = 0;
2171 *s_new++ = c;
2172 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002173 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174}
2175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002176PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177"S.capitalize() -> string\n\
2178\n\
2179Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002180capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181
2182static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002183string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184{
2185 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002186 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002187 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 newobj = PyString_FromStringAndSize(NULL, n);
2190 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002192 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 if (0 < n) {
2194 int c = Py_CHARMASK(*s++);
2195 if (islower(c))
2196 *s_new = toupper(c);
2197 else
2198 *s_new = c;
2199 s_new++;
2200 }
2201 for (i = 1; i < n; i++) {
2202 int c = Py_CHARMASK(*s++);
2203 if (isupper(c))
2204 *s_new = tolower(c);
2205 else
2206 *s_new = c;
2207 s_new++;
2208 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.count(sub[, start[, end]]) -> int\n\
2215\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002216Return the number of non-overlapping occurrences of substring sub in\n\
2217string S[start:end]. Optional arguments start and end are interpreted\n\
2218as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219
2220static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002221string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002223 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002224 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002225 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002226 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228
Guido van Rossumc6821402000-05-08 14:08:05 +00002229 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2230 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002232
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 if (PyString_Check(subobj)) {
2234 sub = PyString_AS_STRING(subobj);
2235 n = PyString_GET_SIZE(subobj);
2236 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002237#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002238 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002239 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002240 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2241 if (count == -1)
2242 return NULL;
2243 else
2244 return PyInt_FromLong((long) count);
2245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002246#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2248 return NULL;
2249
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002250 string_adjust_indices(&i, &last, len);
2251
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252 m = last + 1 - n;
2253 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002254 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255
Fredrik Lundhaf722372006-05-25 17:55:31 +00002256 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2257 if (r < 0)
2258 r = 0; /* no match */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002259 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260}
2261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002262PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263"S.swapcase() -> string\n\
2264\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002266converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
2268static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002269string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
2271 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002272 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002273 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
Anthony Baxtera6286212006-04-11 07:42:36 +00002275 newobj = PyString_FromStringAndSize(NULL, n);
2276 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002278 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279 for (i = 0; i < n; i++) {
2280 int c = Py_CHARMASK(*s++);
2281 if (islower(c)) {
2282 *s_new = toupper(c);
2283 }
2284 else if (isupper(c)) {
2285 *s_new = tolower(c);
2286 }
2287 else
2288 *s_new = c;
2289 s_new++;
2290 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002291 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292}
2293
2294
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296"S.translate(table [,deletechars]) -> string\n\
2297\n\
2298Return a copy of the string S, where all characters occurring\n\
2299in the optional argument deletechars are removed, and the\n\
2300remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002301translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302
2303static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002304string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 register char *input, *output;
2307 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002308 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002311 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312 PyObject *result;
2313 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002316 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319
2320 if (PyString_Check(tableobj)) {
2321 table1 = PyString_AS_STRING(tableobj);
2322 tablen = PyString_GET_SIZE(tableobj);
2323 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002324#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002326 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327 parameter; instead a mapping to None will cause characters
2328 to be deleted. */
2329 if (delobj != NULL) {
2330 PyErr_SetString(PyExc_TypeError,
2331 "deletions are implemented differently for unicode");
2332 return NULL;
2333 }
2334 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002336#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339
Martin v. Löwis00b61272002-12-12 20:03:19 +00002340 if (tablen != 256) {
2341 PyErr_SetString(PyExc_ValueError,
2342 "translation table must be 256 characters long");
2343 return NULL;
2344 }
2345
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 if (delobj != NULL) {
2347 if (PyString_Check(delobj)) {
2348 del_table = PyString_AS_STRING(delobj);
2349 dellen = PyString_GET_SIZE(delobj);
2350 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002351#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 else if (PyUnicode_Check(delobj)) {
2353 PyErr_SetString(PyExc_TypeError,
2354 "deletions are implemented differently for unicode");
2355 return NULL;
2356 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002357#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2359 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360 }
2361 else {
2362 del_table = NULL;
2363 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364 }
2365
2366 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002367 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 result = PyString_FromStringAndSize((char *)NULL, inlen);
2369 if (result == NULL)
2370 return NULL;
2371 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002372 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373
2374 if (dellen == 0) {
2375 /* If no deletions are required, use faster code */
2376 for (i = inlen; --i >= 0; ) {
2377 c = Py_CHARMASK(*input++);
2378 if (Py_CHARMASK((*output++ = table[c])) != c)
2379 changed = 1;
2380 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002381 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 return result;
2383 Py_DECREF(result);
2384 Py_INCREF(input_obj);
2385 return input_obj;
2386 }
2387
2388 for (i = 0; i < 256; i++)
2389 trans_table[i] = Py_CHARMASK(table[i]);
2390
2391 for (i = 0; i < dellen; i++)
2392 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2393
2394 for (i = inlen; --i >= 0; ) {
2395 c = Py_CHARMASK(*input++);
2396 if (trans_table[c] != -1)
2397 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2398 continue;
2399 changed = 1;
2400 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002401 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402 Py_DECREF(result);
2403 Py_INCREF(input_obj);
2404 return input_obj;
2405 }
2406 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002407 if (inlen > 0)
2408 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 return result;
2410}
2411
2412
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002413#define FORWARD 1
2414#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002416/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002418/* Don't call if length < 2 */
2419#define Py_STRING_MATCH(target, offset, pattern, length) \
2420 (target[offset] == pattern[0] && \
2421 target[offset+length-1] == pattern[length-1] && \
2422 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002424#define findchar(target, target_len, c) \
2425 ((char *)memchr((const void *)(target), c, target_len))
2426
2427/* String ops must return a string. */
2428/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002429Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002430return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002432 if (PyString_CheckExact(self)) {
2433 Py_INCREF(self);
2434 return self;
2435 }
2436 return (PyStringObject *)PyString_FromStringAndSize(
2437 PyString_AS_STRING(self),
2438 PyString_GET_SIZE(self));
2439}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002441Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002442countchar(char *target, int target_len, char c)
2443{
2444 Py_ssize_t count=0;
2445 char *start=target;
2446 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002448 while ( (start=findchar(start, end-start, c)) != NULL ) {
2449 count++;
2450 start += 1;
2451 }
2452
2453 return count;
2454}
2455
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002456Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002457findstring(char *target, Py_ssize_t target_len,
2458 char *pattern, Py_ssize_t pattern_len,
2459 Py_ssize_t start,
2460 Py_ssize_t end,
2461 int direction)
2462{
2463 if (start < 0) {
2464 start += target_len;
2465 if (start < 0)
2466 start = 0;
2467 }
2468 if (end > target_len) {
2469 end = target_len;
2470 } else if (end < 0) {
2471 end += target_len;
2472 if (end < 0)
2473 end = 0;
2474 }
2475
2476 /* zero-length substrings always match at the first attempt */
2477 if (pattern_len == 0)
2478 return (direction > 0) ? start : end;
2479
2480 end -= pattern_len;
2481
2482 if (direction < 0) {
2483 for (; end >= start; end--)
2484 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2485 return end;
2486 } else {
2487 for (; start <= end; start++)
2488 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2489 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002490 }
2491 return -1;
2492}
2493
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002494Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002495countstring(char *target, Py_ssize_t target_len,
2496 char *pattern, Py_ssize_t pattern_len,
2497 Py_ssize_t start,
2498 Py_ssize_t end,
2499 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002500{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002501 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002503 if (start < 0) {
2504 start += target_len;
2505 if (start < 0)
2506 start = 0;
2507 }
2508 if (end > target_len) {
2509 end = target_len;
2510 } else if (end < 0) {
2511 end += target_len;
2512 if (end < 0)
2513 end = 0;
2514 }
2515
2516 /* zero-length substrings match everywhere */
2517 if (pattern_len == 0)
2518 return target_len+1;
2519
2520 end -= pattern_len;
2521
2522 if (direction < 0) {
2523 for (; end >= start; end--)
2524 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2525 count++;
2526 end -= pattern_len-1;
2527 }
2528 } else {
2529 for (; start <= end; start++)
2530 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2531 count++;
2532 start += pattern_len-1;
2533 }
2534 }
2535 return count;
2536}
2537
2538
2539/* Algorithms for difference cases of string replacement */
2540
2541/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002542Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543replace_interleave(PyStringObject *self,
2544 PyStringObject *to,
2545 Py_ssize_t maxcount)
2546{
2547 char *self_s, *to_s, *result_s;
2548 Py_ssize_t self_len, to_len, result_len;
2549 Py_ssize_t count, i, product;
2550 PyStringObject *result;
2551
2552 self_len = PyString_GET_SIZE(self);
2553 to_len = PyString_GET_SIZE(to);
2554
2555 /* 1 at the end plus 1 after every character */
2556 count = self_len+1;
2557 if (maxcount < count)
2558 count = maxcount;
2559
2560 /* Check for overflow */
2561 /* result_len = count * to_len + self_len; */
2562 product = count * to_len;
2563 if (product / to_len != count) {
2564 PyErr_SetString(PyExc_OverflowError,
2565 "replace string is too long");
2566 return NULL;
2567 }
2568 result_len = product + self_len;
2569 if (result_len < 0) {
2570 PyErr_SetString(PyExc_OverflowError,
2571 "replace string is too long");
2572 return NULL;
2573 }
2574
2575 if (! (result = (PyStringObject *)
2576 PyString_FromStringAndSize(NULL, result_len)) )
2577 return NULL;
2578
2579 self_s = PyString_AS_STRING(self);
2580 to_s = PyString_AS_STRING(to);
2581 to_len = PyString_GET_SIZE(to);
2582 result_s = PyString_AS_STRING(result);
2583
2584 /* TODO: special case single character, which doesn't need memcpy */
2585
2586 /* Lay the first one down (guaranteed this will occur) */
2587 memcpy(result_s, to_s, to_len);
2588 result_s += to_len;
2589 count -= 1;
2590
2591 for (i=0; i<count; i++) {
2592 *result_s++ = *self_s++;
2593 memcpy(result_s, to_s, to_len);
2594 result_s += to_len;
2595 }
2596
2597 /* Copy the rest of the original string */
2598 memcpy(result_s, self_s, self_len-i);
2599
2600 return result;
2601}
2602
2603/* Special case for deleting a single character */
2604/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002605Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002606replace_delete_single_character(PyStringObject *self,
2607 char from_c, Py_ssize_t maxcount)
2608{
2609 char *self_s, *result_s;
2610 char *start, *next, *end;
2611 Py_ssize_t self_len, result_len;
2612 Py_ssize_t count;
2613 PyStringObject *result;
2614
2615 self_len = PyString_GET_SIZE(self);
2616 self_s = PyString_AS_STRING(self);
2617
2618 count = countchar(self_s, self_len, from_c);
2619 if (count == 0) {
2620 return return_self(self);
2621 }
2622 if (count > maxcount)
2623 count = maxcount;
2624
2625 result_len = self_len - count; /* from_len == 1 */
2626 assert(result_len>=0);
2627
2628 if ( (result = (PyStringObject *)
2629 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2630 return NULL;
2631 result_s = PyString_AS_STRING(result);
2632
2633 start = self_s;
2634 end = self_s + self_len;
2635 while (count-- > 0) {
2636 next = findchar(start, end-start, from_c);
2637 if (next == NULL)
2638 break;
2639 memcpy(result_s, start, next-start);
2640 result_s += (next-start);
2641 start = next+1;
2642 }
2643 memcpy(result_s, start, end-start);
2644
2645 return result;
2646}
2647
2648/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2649
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002650Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002651replace_delete_substring(PyStringObject *self, PyStringObject *from,
2652 Py_ssize_t maxcount) {
2653 char *self_s, *from_s, *result_s;
2654 char *start, *next, *end;
2655 Py_ssize_t self_len, from_len, result_len;
2656 Py_ssize_t count, offset;
2657 PyStringObject *result;
2658
2659 self_len = PyString_GET_SIZE(self);
2660 self_s = PyString_AS_STRING(self);
2661 from_len = PyString_GET_SIZE(from);
2662 from_s = PyString_AS_STRING(from);
2663
2664 count = countstring(self_s, self_len,
2665 from_s, from_len,
2666 0, self_len, 1);
2667
2668 if (count > maxcount)
2669 count = maxcount;
2670
2671 if (count == 0) {
2672 /* no matches */
2673 return return_self(self);
2674 }
2675
2676 result_len = self_len - (count * from_len);
2677 assert (result_len>=0);
2678
2679 if ( (result = (PyStringObject *)
2680 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2681 return NULL;
2682
2683 result_s = PyString_AS_STRING(result);
2684
2685 start = self_s;
2686 end = self_s + self_len;
2687 while (count-- > 0) {
2688 offset = findstring(start, end-start,
2689 from_s, from_len,
2690 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002691 if (offset == -1)
2692 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002693 next = start + offset;
2694
2695 memcpy(result_s, start, next-start);
2696
2697 result_s += (next-start);
2698 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002699 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002700 memcpy(result_s, start, end-start);
2701 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002702}
2703
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002705Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002706replace_single_character_in_place(PyStringObject *self,
2707 char from_c, char to_c,
2708 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002709{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002710 char *self_s, *result_s, *start, *end, *next;
2711 Py_ssize_t self_len;
2712 PyStringObject *result;
2713
2714 /* The result string will be the same size */
2715 self_s = PyString_AS_STRING(self);
2716 self_len = PyString_GET_SIZE(self);
2717
2718 next = findchar(self_s, self_len, from_c);
2719
2720 if (next == NULL) {
2721 /* No matches; return the original string */
2722 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002723 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724
2725 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002726 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002727 if (result == NULL)
2728 return NULL;
2729 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002730 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731
2732 /* change everything in-place, starting with this one */
2733 start = result_s + (next-self_s);
2734 *start = to_c;
2735 start++;
2736 end = result_s + self_len;
2737
2738 while (--maxcount > 0) {
2739 next = findchar(start, end-start, from_c);
2740 if (next == NULL)
2741 break;
2742 *next = to_c;
2743 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002744 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002745
2746 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002747}
2748
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002749/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002750Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751replace_substring_in_place(PyStringObject *self,
2752 PyStringObject *from,
2753 PyStringObject *to,
2754 Py_ssize_t maxcount)
2755{
2756 char *result_s, *start, *end;
2757 char *self_s, *from_s, *to_s;
2758 Py_ssize_t self_len, from_len, offset;
2759 PyStringObject *result;
2760
2761 /* The result string will be the same size */
2762
2763 self_s = PyString_AS_STRING(self);
2764 self_len = PyString_GET_SIZE(self);
2765
2766 from_s = PyString_AS_STRING(from);
2767 from_len = PyString_GET_SIZE(from);
2768 to_s = PyString_AS_STRING(to);
2769
2770 offset = findstring(self_s, self_len,
2771 from_s, from_len,
2772 0, self_len, FORWARD);
2773
2774 if (offset == -1) {
2775 /* No matches; return the original string */
2776 return return_self(self);
2777 }
2778
2779 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002780 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002781 if (result == NULL)
2782 return NULL;
2783 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002784 memcpy(result_s, self_s, self_len);
2785
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002786
2787 /* change everything in-place, starting with this one */
2788 start = result_s + offset;
2789 memcpy(start, to_s, from_len);
2790 start += from_len;
2791 end = result_s + self_len;
2792
2793 while ( --maxcount > 0) {
2794 offset = findstring(start, end-start,
2795 from_s, from_len,
2796 0, end-start, FORWARD);
2797 if (offset==-1)
2798 break;
2799 memcpy(start+offset, to_s, from_len);
2800 start += offset+from_len;
2801 }
2802
2803 return result;
2804}
2805
2806/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002807Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808replace_single_character(PyStringObject *self,
2809 char from_c,
2810 PyStringObject *to,
2811 Py_ssize_t maxcount)
2812{
2813 char *self_s, *to_s, *result_s;
2814 char *start, *next, *end;
2815 Py_ssize_t self_len, to_len, result_len;
2816 Py_ssize_t count, product;
2817 PyStringObject *result;
2818
2819 self_s = PyString_AS_STRING(self);
2820 self_len = PyString_GET_SIZE(self);
2821
2822 count = countchar(self_s, self_len, from_c);
2823 if (count > maxcount)
2824 count = maxcount;
2825
2826 if (count == 0) {
2827 /* no matches, return unchanged */
2828 return return_self(self);
2829 }
2830
2831 to_s = PyString_AS_STRING(to);
2832 to_len = PyString_GET_SIZE(to);
2833
2834 /* use the difference between current and new, hence the "-1" */
2835 /* result_len = self_len + count * (to_len-1) */
2836 product = count * (to_len-1);
2837 if (product / (to_len-1) != count) {
2838 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2839 return NULL;
2840 }
2841 result_len = self_len + product;
2842 if (result_len < 0) {
2843 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2844 return NULL;
2845 }
2846
2847 if ( (result = (PyStringObject *)
2848 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2849 return NULL;
2850 result_s = PyString_AS_STRING(result);
2851
2852 start = self_s;
2853 end = self_s + self_len;
2854 while (count-- > 0) {
2855 next = findchar(start, end-start, from_c);
2856 if (next == NULL)
2857 break;
2858
2859 if (next == start) {
2860 /* replace with the 'to' */
2861 memcpy(result_s, to_s, to_len);
2862 result_s += to_len;
2863 start += 1;
2864 } else {
2865 /* copy the unchanged old then the 'to' */
2866 memcpy(result_s, start, next-start);
2867 result_s += (next-start);
2868 memcpy(result_s, to_s, to_len);
2869 result_s += to_len;
2870 start = next+1;
2871 }
2872 }
2873 /* Copy the remainder of the remaining string */
2874 memcpy(result_s, start, end-start);
2875
2876 return result;
2877}
2878
2879/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002880Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881replace_substring(PyStringObject *self,
2882 PyStringObject *from,
2883 PyStringObject *to,
2884 Py_ssize_t maxcount) {
2885 char *self_s, *from_s, *to_s, *result_s;
2886 char *start, *next, *end;
2887 Py_ssize_t self_len, from_len, to_len, result_len;
2888 Py_ssize_t count, offset, product;
2889 PyStringObject *result;
2890
2891 self_s = PyString_AS_STRING(self);
2892 self_len = PyString_GET_SIZE(self);
2893 from_s = PyString_AS_STRING(from);
2894 from_len = PyString_GET_SIZE(from);
2895
2896 count = countstring(self_s, self_len,
2897 from_s, from_len,
2898 0, self_len, FORWARD);
2899 if (count > maxcount)
2900 count = maxcount;
2901
2902 if (count == 0) {
2903 /* no matches, return unchanged */
2904 return return_self(self);
2905 }
2906
2907 to_s = PyString_AS_STRING(to);
2908 to_len = PyString_GET_SIZE(to);
2909
2910 /* Check for overflow */
2911 /* result_len = self_len + count * (to_len-from_len) */
2912 product = count * (to_len-from_len);
2913 if (product / (to_len-from_len) != count) {
2914 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2915 return NULL;
2916 }
2917 result_len = self_len + product;
2918 if (result_len < 0) {
2919 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2920 return NULL;
2921 }
2922
2923 if ( (result = (PyStringObject *)
2924 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2925 return NULL;
2926 result_s = PyString_AS_STRING(result);
2927
2928 start = self_s;
2929 end = self_s + self_len;
2930 while (count-- > 0) {
2931 offset = findstring(start, end-start,
2932 from_s, from_len,
2933 0, end-start, FORWARD);
2934 if (offset == -1)
2935 break;
2936 next = start+offset;
2937 if (next == start) {
2938 /* replace with the 'to' */
2939 memcpy(result_s, to_s, to_len);
2940 result_s += to_len;
2941 start += from_len;
2942 } else {
2943 /* copy the unchanged old then the 'to' */
2944 memcpy(result_s, start, next-start);
2945 result_s += (next-start);
2946 memcpy(result_s, to_s, to_len);
2947 result_s += to_len;
2948 start = next+from_len;
2949 }
2950 }
2951 /* Copy the remainder of the remaining string */
2952 memcpy(result_s, start, end-start);
2953
2954 return result;
2955}
2956
2957
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002958Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959replace(PyStringObject *self,
2960 PyStringObject *from,
2961 PyStringObject *to,
2962 Py_ssize_t maxcount)
2963{
2964 Py_ssize_t from_len, to_len;
2965
2966 if (maxcount < 0) {
2967 maxcount = PY_SSIZE_T_MAX;
2968 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2969 /* nothing to do; return the original string */
2970 return return_self(self);
2971 }
2972
2973 from_len = PyString_GET_SIZE(from);
2974 to_len = PyString_GET_SIZE(to);
2975
2976 if (maxcount == 0 ||
2977 (from_len == 0 && to_len == 0)) {
2978 /* nothing to do; return the original string */
2979 return return_self(self);
2980 }
2981
2982 /* Handle zero-length special cases */
2983
2984 if (from_len == 0) {
2985 /* insert the 'to' string everywhere. */
2986 /* >>> "Python".replace("", ".") */
2987 /* '.P.y.t.h.o.n.' */
2988 return replace_interleave(self, to, maxcount);
2989 }
2990
2991 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2992 /* point for an empty self string to generate a non-empty string */
2993 /* Special case so the remaining code always gets a non-empty string */
2994 if (PyString_GET_SIZE(self) == 0) {
2995 return return_self(self);
2996 }
2997
2998 if (to_len == 0) {
2999 /* delete all occurances of 'from' string */
3000 if (from_len == 1) {
3001 return replace_delete_single_character(
3002 self, PyString_AS_STRING(from)[0], maxcount);
3003 } else {
3004 return replace_delete_substring(self, from, maxcount);
3005 }
3006 }
3007
3008 /* Handle special case where both strings have the same length */
3009
3010 if (from_len == to_len) {
3011 if (from_len == 1) {
3012 return replace_single_character_in_place(
3013 self,
3014 PyString_AS_STRING(from)[0],
3015 PyString_AS_STRING(to)[0],
3016 maxcount);
3017 } else {
3018 return replace_substring_in_place(
3019 self, from, to, maxcount);
3020 }
3021 }
3022
3023 /* Otherwise use the more generic algorithms */
3024 if (from_len == 1) {
3025 return replace_single_character(self, PyString_AS_STRING(from)[0],
3026 to, maxcount);
3027 } else {
3028 /* len('from')>=2, len('to')>=1 */
3029 return replace_substring(self, from, to, maxcount);
3030 }
3031}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003032
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003033PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003034"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003035\n\
3036Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003037old replaced by new. If the optional argument count is\n\
3038given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039
3040static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003041string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003042{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003043 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003044 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003045 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003046 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003047
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003050
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003051 if (PyString_Check(from)) {
3052 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003053 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003054#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003056 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003058#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003059 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060 return NULL;
3061
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 if (PyString_Check(to)) {
3063 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003064 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003065#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003066 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003067 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003068 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003069#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071 return NULL;
3072
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003073 return (PyObject *)replace((PyStringObject *) self,
3074 (PyStringObject *) from,
3075 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076}
3077
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003080PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003081"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003082\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003083Return True if S starts with the specified prefix, False otherwise.\n\
3084With optional start, test S beginning at that position.\n\
3085With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086
3087static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003088string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003089{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003091 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003093 Py_ssize_t plen;
3094 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003095 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003097
Guido van Rossumc6821402000-05-08 14:08:05 +00003098 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3099 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100 return NULL;
3101 if (PyString_Check(subobj)) {
3102 prefix = PyString_AS_STRING(subobj);
3103 plen = PyString_GET_SIZE(subobj);
3104 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003105#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003106 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003107 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003108 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003109 subobj, start, end, -1);
3110 if (rc == -1)
3111 return NULL;
3112 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003113 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003114 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003115#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003117 return NULL;
3118
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003119 string_adjust_indices(&start, &end, len);
3120
3121 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003123
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003124 if (end-start >= plen)
3125 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3126 else
3127 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003128}
3129
3130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003131PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003133\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003134Return True if S ends with the specified suffix, False otherwise.\n\
3135With optional start, test S beginning at that position.\n\
3136With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
3138static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003139string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003142 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003144 Py_ssize_t slen;
3145 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003146 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003148
Guido van Rossumc6821402000-05-08 14:08:05 +00003149 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3150 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 return NULL;
3152 if (PyString_Check(subobj)) {
3153 suffix = PyString_AS_STRING(subobj);
3154 slen = PyString_GET_SIZE(subobj);
3155 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003156#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003157 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003158 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003159 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003160 subobj, start, end, +1);
3161 if (rc == -1)
3162 return NULL;
3163 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003164 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003165 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003166#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003168 return NULL;
3169
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003170 string_adjust_indices(&start, &end, len);
3171
3172 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003173 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003175 if (end-slen > start)
3176 start = end - slen;
3177 if (end-start >= slen)
3178 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3179 else
3180 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181}
3182
3183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003184PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003185"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003186\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003187Encodes S using the codec registered for encoding. encoding defaults\n\
3188to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003189handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003190a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3191'xmlcharrefreplace' as well as any other name registered with\n\
3192codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003193
3194static PyObject *
3195string_encode(PyStringObject *self, PyObject *args)
3196{
3197 char *encoding = NULL;
3198 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003199 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003200
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003201 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3202 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003203 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003204 if (v == NULL)
3205 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003206 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3207 PyErr_Format(PyExc_TypeError,
3208 "encoder did not return a string/unicode object "
3209 "(type=%.400s)",
3210 v->ob_type->tp_name);
3211 Py_DECREF(v);
3212 return NULL;
3213 }
3214 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003215
3216 onError:
3217 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003218}
3219
3220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222"S.decode([encoding[,errors]]) -> object\n\
3223\n\
3224Decodes S using the codec registered for encoding. encoding defaults\n\
3225to the default encoding. errors may be given to set a different error\n\
3226handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003227a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3228as well as any other name registerd with codecs.register_error that is\n\
3229able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003230
3231static PyObject *
3232string_decode(PyStringObject *self, PyObject *args)
3233{
3234 char *encoding = NULL;
3235 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003236 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003237
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003238 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3239 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003240 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003241 if (v == NULL)
3242 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003243 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3244 PyErr_Format(PyExc_TypeError,
3245 "decoder did not return a string/unicode object "
3246 "(type=%.400s)",
3247 v->ob_type->tp_name);
3248 Py_DECREF(v);
3249 return NULL;
3250 }
3251 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003252
3253 onError:
3254 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003255}
3256
3257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003258PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003259"S.expandtabs([tabsize]) -> string\n\
3260\n\
3261Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003262If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263
3264static PyObject*
3265string_expandtabs(PyStringObject *self, PyObject *args)
3266{
3267 const char *e, *p;
3268 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003269 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 PyObject *u;
3271 int tabsize = 8;
3272
3273 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3274 return NULL;
3275
Thomas Wouters7e474022000-07-16 12:04:32 +00003276 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003277 i = j = 0;
3278 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3279 for (p = PyString_AS_STRING(self); p < e; p++)
3280 if (*p == '\t') {
3281 if (tabsize > 0)
3282 j += tabsize - (j % tabsize);
3283 }
3284 else {
3285 j++;
3286 if (*p == '\n' || *p == '\r') {
3287 i += j;
3288 j = 0;
3289 }
3290 }
3291
3292 /* Second pass: create output string and fill it */
3293 u = PyString_FromStringAndSize(NULL, i + j);
3294 if (!u)
3295 return NULL;
3296
3297 j = 0;
3298 q = PyString_AS_STRING(u);
3299
3300 for (p = PyString_AS_STRING(self); p < e; p++)
3301 if (*p == '\t') {
3302 if (tabsize > 0) {
3303 i = tabsize - (j % tabsize);
3304 j += i;
3305 while (i--)
3306 *q++ = ' ';
3307 }
3308 }
3309 else {
3310 j++;
3311 *q++ = *p;
3312 if (*p == '\n' || *p == '\r')
3313 j = 0;
3314 }
3315
3316 return u;
3317}
3318
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003319Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003320pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321{
3322 PyObject *u;
3323
3324 if (left < 0)
3325 left = 0;
3326 if (right < 0)
3327 right = 0;
3328
Tim Peters8fa5dd02001-09-12 02:18:30 +00003329 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003330 Py_INCREF(self);
3331 return (PyObject *)self;
3332 }
3333
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003334 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003335 left + PyString_GET_SIZE(self) + right);
3336 if (u) {
3337 if (left)
3338 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003339 memcpy(PyString_AS_STRING(u) + left,
3340 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341 PyString_GET_SIZE(self));
3342 if (right)
3343 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3344 fill, right);
3345 }
3346
3347 return u;
3348}
3349
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003350PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003351"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003352"\n"
3353"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003354"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003355
3356static PyObject *
3357string_ljust(PyStringObject *self, PyObject *args)
3358{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003359 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003360 char fillchar = ' ';
3361
Thomas Wouters4abb3662006-04-19 14:50:15 +00003362 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 return NULL;
3364
Tim Peters8fa5dd02001-09-12 02:18:30 +00003365 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003366 Py_INCREF(self);
3367 return (PyObject*) self;
3368 }
3369
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003370 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003371}
3372
3373
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003374PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003375"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003376"\n"
3377"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003378"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379
3380static PyObject *
3381string_rjust(PyStringObject *self, PyObject *args)
3382{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003383 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003384 char fillchar = ' ';
3385
Thomas Wouters4abb3662006-04-19 14:50:15 +00003386 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 return NULL;
3388
Tim Peters8fa5dd02001-09-12 02:18:30 +00003389 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390 Py_INCREF(self);
3391 return (PyObject*) self;
3392 }
3393
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003394 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395}
3396
3397
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003398PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003399"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003400"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003401"Return S centered in a string of length width. Padding is\n"
3402"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403
3404static PyObject *
3405string_center(PyStringObject *self, PyObject *args)
3406{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003407 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003408 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003409 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410
Thomas Wouters4abb3662006-04-19 14:50:15 +00003411 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 return NULL;
3413
Tim Peters8fa5dd02001-09-12 02:18:30 +00003414 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415 Py_INCREF(self);
3416 return (PyObject*) self;
3417 }
3418
3419 marg = width - PyString_GET_SIZE(self);
3420 left = marg / 2 + (marg & width & 1);
3421
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003422 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423}
3424
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003425PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003426"S.zfill(width) -> string\n"
3427"\n"
3428"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003429"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003430
3431static PyObject *
3432string_zfill(PyStringObject *self, PyObject *args)
3433{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003434 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003435 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003436 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003437 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003438
Thomas Wouters4abb3662006-04-19 14:50:15 +00003439 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003440 return NULL;
3441
3442 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003443 if (PyString_CheckExact(self)) {
3444 Py_INCREF(self);
3445 return (PyObject*) self;
3446 }
3447 else
3448 return PyString_FromStringAndSize(
3449 PyString_AS_STRING(self),
3450 PyString_GET_SIZE(self)
3451 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003452 }
3453
3454 fill = width - PyString_GET_SIZE(self);
3455
3456 s = pad(self, fill, 0, '0');
3457
3458 if (s == NULL)
3459 return NULL;
3460
3461 p = PyString_AS_STRING(s);
3462 if (p[fill] == '+' || p[fill] == '-') {
3463 /* move sign to beginning of string */
3464 p[0] = p[fill];
3465 p[fill] = '0';
3466 }
3467
3468 return (PyObject*) s;
3469}
3470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003471PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003472"S.isspace() -> bool\n\
3473\n\
3474Return True if all characters in S are whitespace\n\
3475and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003476
3477static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003478string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003479{
Fred Drakeba096332000-07-09 07:04:36 +00003480 register const unsigned char *p
3481 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003482 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003483
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484 /* Shortcut for single character strings */
3485 if (PyString_GET_SIZE(self) == 1 &&
3486 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003487 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003489 /* Special case for empty strings */
3490 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003491 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003492
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493 e = p + PyString_GET_SIZE(self);
3494 for (; p < e; p++) {
3495 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003496 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003497 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003498 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003499}
3500
3501
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003502PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003503"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003504\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003505Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003506and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003507
3508static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003509string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003510{
Fred Drakeba096332000-07-09 07:04:36 +00003511 register const unsigned char *p
3512 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003513 register const unsigned char *e;
3514
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003515 /* Shortcut for single character strings */
3516 if (PyString_GET_SIZE(self) == 1 &&
3517 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003519
3520 /* Special case for empty strings */
3521 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003523
3524 e = p + PyString_GET_SIZE(self);
3525 for (; p < e; p++) {
3526 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003527 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003528 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003529 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530}
3531
3532
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003533PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003534"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003535\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003536Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003537and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003538
3539static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003540string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003541{
Fred Drakeba096332000-07-09 07:04:36 +00003542 register const unsigned char *p
3543 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003544 register const unsigned char *e;
3545
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003546 /* Shortcut for single character strings */
3547 if (PyString_GET_SIZE(self) == 1 &&
3548 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550
3551 /* Special case for empty strings */
3552 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003553 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003554
3555 e = p + PyString_GET_SIZE(self);
3556 for (; p < e; p++) {
3557 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003558 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003559 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561}
3562
3563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003564PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003567Return True if all characters in S are digits\n\
3568and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569
3570static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003571string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003572{
Fred Drakeba096332000-07-09 07:04:36 +00003573 register const unsigned char *p
3574 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003575 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003576
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577 /* Shortcut for single character strings */
3578 if (PyString_GET_SIZE(self) == 1 &&
3579 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003581
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003582 /* Special case for empty strings */
3583 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003585
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586 e = p + PyString_GET_SIZE(self);
3587 for (; p < e; p++) {
3588 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003589 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003591 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592}
3593
3594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003595PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003599at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003600
3601static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003602string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603{
Fred Drakeba096332000-07-09 07:04:36 +00003604 register const unsigned char *p
3605 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003606 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607 int cased;
3608
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609 /* Shortcut for single character strings */
3610 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003613 /* Special case for empty strings */
3614 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003616
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617 e = p + PyString_GET_SIZE(self);
3618 cased = 0;
3619 for (; p < e; p++) {
3620 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622 else if (!cased && islower(*p))
3623 cased = 1;
3624 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626}
3627
3628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003629PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003632Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003633at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634
3635static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003636string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637{
Fred Drakeba096332000-07-09 07:04:36 +00003638 register const unsigned char *p
3639 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003640 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 int cased;
3642
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643 /* Shortcut for single character strings */
3644 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003647 /* Special case for empty strings */
3648 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003650
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651 e = p + PyString_GET_SIZE(self);
3652 cased = 0;
3653 for (; p < e; p++) {
3654 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656 else if (!cased && isupper(*p))
3657 cased = 1;
3658 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003659 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660}
3661
3662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003663PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003666Return True if S is a titlecased string and there is at least one\n\
3667character in S, i.e. uppercase characters may only follow uncased\n\
3668characters and lowercase characters only cased ones. Return False\n\
3669otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670
3671static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003672string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673{
Fred Drakeba096332000-07-09 07:04:36 +00003674 register const unsigned char *p
3675 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003676 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677 int cased, previous_is_cased;
3678
Guido van Rossum4c08d552000-03-10 22:55:18 +00003679 /* Shortcut for single character strings */
3680 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003683 /* Special case for empty strings */
3684 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003685 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003686
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687 e = p + PyString_GET_SIZE(self);
3688 cased = 0;
3689 previous_is_cased = 0;
3690 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003691 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
3693 if (isupper(ch)) {
3694 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003695 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 previous_is_cased = 1;
3697 cased = 1;
3698 }
3699 else if (islower(ch)) {
3700 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 previous_is_cased = 1;
3703 cased = 1;
3704 }
3705 else
3706 previous_is_cased = 0;
3707 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709}
3710
3711
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003712PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003713"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714\n\
3715Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003716Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003717is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719static PyObject*
3720string_splitlines(PyStringObject *self, PyObject *args)
3721{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003722 register Py_ssize_t i;
3723 register Py_ssize_t j;
3724 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003725 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726 PyObject *list;
3727 PyObject *str;
3728 char *data;
3729
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003730 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 return NULL;
3732
3733 data = PyString_AS_STRING(self);
3734 len = PyString_GET_SIZE(self);
3735
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736 list = PyList_New(0);
3737 if (!list)
3738 goto onError;
3739
3740 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003741 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003742
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743 /* Find a line and append it */
3744 while (i < len && data[i] != '\n' && data[i] != '\r')
3745 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746
3747 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003748 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749 if (i < len) {
3750 if (data[i] == '\r' && i + 1 < len &&
3751 data[i+1] == '\n')
3752 i += 2;
3753 else
3754 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003755 if (keepends)
3756 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003758 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759 j = i;
3760 }
3761 if (j < len) {
3762 SPLIT_APPEND(data, j, len);
3763 }
3764
3765 return list;
3766
3767 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003768 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769 return NULL;
3770}
3771
3772#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003773#undef SPLIT_ADD
3774#undef MAX_PREALLOC
3775#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003777static PyObject *
3778string_getnewargs(PyStringObject *v)
3779{
3780 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3781}
3782
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003783
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003784static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003785string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 /* Counterparts of the obsolete stropmodule functions; except
3787 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003788 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3789 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003790 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003791 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3792 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003793 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3794 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3795 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3796 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3797 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3798 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3799 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003800 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3801 capitalize__doc__},
3802 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3803 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3804 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003805 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003806 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3807 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3808 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3809 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3810 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3811 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3812 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3813 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3814 startswith__doc__},
3815 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3816 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3817 swapcase__doc__},
3818 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3819 translate__doc__},
3820 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3821 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3822 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3823 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3824 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3825 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3826 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3827 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3828 expandtabs__doc__},
3829 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3830 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003831 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003832 {NULL, NULL} /* sentinel */
3833};
3834
Jeremy Hylton938ace62002-07-17 16:30:39 +00003835static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003836str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3837
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003838static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003839string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003840{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003841 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003842 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003843
Guido van Rossumae960af2001-08-30 03:11:59 +00003844 if (type != &PyString_Type)
3845 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003846 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3847 return NULL;
3848 if (x == NULL)
3849 return PyString_FromString("");
3850 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003851}
3852
Guido van Rossumae960af2001-08-30 03:11:59 +00003853static PyObject *
3854str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3855{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003856 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003857 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003858
3859 assert(PyType_IsSubtype(type, &PyString_Type));
3860 tmp = string_new(&PyString_Type, args, kwds);
3861 if (tmp == NULL)
3862 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003863 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003864 n = PyString_GET_SIZE(tmp);
3865 pnew = type->tp_alloc(type, n);
3866 if (pnew != NULL) {
3867 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003868 ((PyStringObject *)pnew)->ob_shash =
3869 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003870 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003871 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003872 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003873 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003874}
3875
Guido van Rossumcacfc072002-05-24 19:01:59 +00003876static PyObject *
3877basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3878{
3879 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003880 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003881 return NULL;
3882}
3883
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003884static PyObject *
3885string_mod(PyObject *v, PyObject *w)
3886{
3887 if (!PyString_Check(v)) {
3888 Py_INCREF(Py_NotImplemented);
3889 return Py_NotImplemented;
3890 }
3891 return PyString_Format(v, w);
3892}
3893
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003894PyDoc_STRVAR(basestring_doc,
3895"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003896
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003897static PyNumberMethods string_as_number = {
3898 0, /*nb_add*/
3899 0, /*nb_subtract*/
3900 0, /*nb_multiply*/
3901 0, /*nb_divide*/
3902 string_mod, /*nb_remainder*/
3903};
3904
3905
Guido van Rossumcacfc072002-05-24 19:01:59 +00003906PyTypeObject PyBaseString_Type = {
3907 PyObject_HEAD_INIT(&PyType_Type)
3908 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003909 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003910 0,
3911 0,
3912 0, /* tp_dealloc */
3913 0, /* tp_print */
3914 0, /* tp_getattr */
3915 0, /* tp_setattr */
3916 0, /* tp_compare */
3917 0, /* tp_repr */
3918 0, /* tp_as_number */
3919 0, /* tp_as_sequence */
3920 0, /* tp_as_mapping */
3921 0, /* tp_hash */
3922 0, /* tp_call */
3923 0, /* tp_str */
3924 0, /* tp_getattro */
3925 0, /* tp_setattro */
3926 0, /* tp_as_buffer */
3927 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3928 basestring_doc, /* tp_doc */
3929 0, /* tp_traverse */
3930 0, /* tp_clear */
3931 0, /* tp_richcompare */
3932 0, /* tp_weaklistoffset */
3933 0, /* tp_iter */
3934 0, /* tp_iternext */
3935 0, /* tp_methods */
3936 0, /* tp_members */
3937 0, /* tp_getset */
3938 &PyBaseObject_Type, /* tp_base */
3939 0, /* tp_dict */
3940 0, /* tp_descr_get */
3941 0, /* tp_descr_set */
3942 0, /* tp_dictoffset */
3943 0, /* tp_init */
3944 0, /* tp_alloc */
3945 basestring_new, /* tp_new */
3946 0, /* tp_free */
3947};
3948
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003949PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003950"str(object) -> string\n\
3951\n\
3952Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003953If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003954
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955PyTypeObject PyString_Type = {
3956 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003957 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003958 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003959 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003960 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003961 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003962 (printfunc)string_print, /* tp_print */
3963 0, /* tp_getattr */
3964 0, /* tp_setattr */
3965 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003966 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003967 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003968 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003969 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003970 (hashfunc)string_hash, /* tp_hash */
3971 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00003972 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003973 PyObject_GenericGetAttr, /* tp_getattro */
3974 0, /* tp_setattro */
3975 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00003976 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003977 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003978 string_doc, /* tp_doc */
3979 0, /* tp_traverse */
3980 0, /* tp_clear */
3981 (richcmpfunc)string_richcompare, /* tp_richcompare */
3982 0, /* tp_weaklistoffset */
3983 0, /* tp_iter */
3984 0, /* tp_iternext */
3985 string_methods, /* tp_methods */
3986 0, /* tp_members */
3987 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003988 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003989 0, /* tp_dict */
3990 0, /* tp_descr_get */
3991 0, /* tp_descr_set */
3992 0, /* tp_dictoffset */
3993 0, /* tp_init */
3994 0, /* tp_alloc */
3995 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003996 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003997};
3998
3999void
Fred Drakeba096332000-07-09 07:04:36 +00004000PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004001{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004002 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004003 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004004 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004005 if (w == NULL || !PyString_Check(*pv)) {
4006 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004007 *pv = NULL;
4008 return;
4009 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 v = string_concat((PyStringObject *) *pv, w);
4011 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004012 *pv = v;
4013}
4014
Guido van Rossum013142a1994-08-30 08:19:36 +00004015void
Fred Drakeba096332000-07-09 07:04:36 +00004016PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004017{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 PyString_Concat(pv, w);
4019 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004020}
4021
4022
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004023/* The following function breaks the notion that strings are immutable:
4024 it changes the size of a string. We get away with this only if there
4025 is only one module referencing the object. You can also think of it
4026 as creating a new string object and destroying the old one, only
4027 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004028 already be known to some other part of the code...
4029 Note that if there's not enough memory to resize the string, the original
4030 string object at *pv is deallocated, *pv is set to NULL, an "out of
4031 memory" exception is set, and -1 is returned. Else (on success) 0 is
4032 returned, and the value in *pv may or may not be the same as on input.
4033 As always, an extra byte is allocated for a trailing \0 byte (newsize
4034 does *not* include that), and a trailing \0 byte is stored.
4035*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004036
4037int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004038_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004039{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004040 register PyObject *v;
4041 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004042 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004043 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4044 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004045 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 Py_DECREF(v);
4047 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004048 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004049 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004050 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004051 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 _Py_ForgetReference(v);
4053 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004054 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004055 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004056 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004057 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004058 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004059 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004060 _Py_NewReference(*pv);
4061 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004062 sv->ob_size = newsize;
4063 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004064 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065 return 0;
4066}
Guido van Rossume5372401993-03-16 12:15:04 +00004067
4068/* Helpers for formatstring */
4069
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004070Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004071getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004072{
Thomas Wouters977485d2006-02-16 15:59:12 +00004073 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004074 if (argidx < arglen) {
4075 (*p_argidx)++;
4076 if (arglen < 0)
4077 return args;
4078 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004080 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004081 PyErr_SetString(PyExc_TypeError,
4082 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004083 return NULL;
4084}
4085
Tim Peters38fd5b62000-09-21 05:43:11 +00004086/* Format codes
4087 * F_LJUST '-'
4088 * F_SIGN '+'
4089 * F_BLANK ' '
4090 * F_ALT '#'
4091 * F_ZERO '0'
4092 */
Guido van Rossume5372401993-03-16 12:15:04 +00004093#define F_LJUST (1<<0)
4094#define F_SIGN (1<<1)
4095#define F_BLANK (1<<2)
4096#define F_ALT (1<<3)
4097#define F_ZERO (1<<4)
4098
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004099Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004100formatfloat(char *buf, size_t buflen, int flags,
4101 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004103 /* fmt = '%#.' + `prec` + `type`
4104 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004105 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004106 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004107 x = PyFloat_AsDouble(v);
4108 if (x == -1.0 && PyErr_Occurred()) {
4109 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004110 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004111 }
Guido van Rossume5372401993-03-16 12:15:04 +00004112 if (prec < 0)
4113 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004114 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4115 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004116 /* Worst case length calc to ensure no buffer overrun:
4117
4118 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004119 fmt = %#.<prec>g
4120 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004121 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004122 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004123
4124 'f' formats:
4125 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4126 len = 1 + 50 + 1 + prec = 52 + prec
4127
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004128 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004129 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004130
4131 */
4132 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4133 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004134 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004135 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004136 return -1;
4137 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004138 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4139 (flags&F_ALT) ? "#" : "",
4140 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004141 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004142 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004143}
4144
Tim Peters38fd5b62000-09-21 05:43:11 +00004145/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4146 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4147 * Python's regular ints.
4148 * Return value: a new PyString*, or NULL if error.
4149 * . *pbuf is set to point into it,
4150 * *plen set to the # of chars following that.
4151 * Caller must decref it when done using pbuf.
4152 * The string starting at *pbuf is of the form
4153 * "-"? ("0x" | "0X")? digit+
4154 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004155 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 * There will be at least prec digits, zero-filled on the left if
4157 * necessary to get that many.
4158 * val object to be converted
4159 * flags bitmask of format flags; only F_ALT is looked at
4160 * prec minimum number of digits; 0-fill on left if needed
4161 * type a character in [duoxX]; u acts the same as d
4162 *
4163 * CAUTION: o, x and X conversions on regular ints can never
4164 * produce a '-' sign, but can for Python's unbounded ints.
4165 */
4166PyObject*
4167_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4168 char **pbuf, int *plen)
4169{
4170 PyObject *result = NULL;
4171 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004172 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004173 int sign; /* 1 if '-', else 0 */
4174 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004175 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004176 int numdigits; /* len == numnondigits + numdigits */
4177 int numnondigits = 0;
4178
4179 switch (type) {
4180 case 'd':
4181 case 'u':
4182 result = val->ob_type->tp_str(val);
4183 break;
4184 case 'o':
4185 result = val->ob_type->tp_as_number->nb_oct(val);
4186 break;
4187 case 'x':
4188 case 'X':
4189 numnondigits = 2;
4190 result = val->ob_type->tp_as_number->nb_hex(val);
4191 break;
4192 default:
4193 assert(!"'type' not in [duoxX]");
4194 }
4195 if (!result)
4196 return NULL;
4197
4198 /* To modify the string in-place, there can only be one reference. */
4199 if (result->ob_refcnt != 1) {
4200 PyErr_BadInternalCall();
4201 return NULL;
4202 }
4203 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004204 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004205 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004206 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4207 return NULL;
4208 }
4209 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004210 if (buf[len-1] == 'L') {
4211 --len;
4212 buf[len] = '\0';
4213 }
4214 sign = buf[0] == '-';
4215 numnondigits += sign;
4216 numdigits = len - numnondigits;
4217 assert(numdigits > 0);
4218
Tim Petersfff53252001-04-12 18:38:48 +00004219 /* Get rid of base marker unless F_ALT */
4220 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 /* Need to skip 0x, 0X or 0. */
4222 int skipped = 0;
4223 switch (type) {
4224 case 'o':
4225 assert(buf[sign] == '0');
4226 /* If 0 is only digit, leave it alone. */
4227 if (numdigits > 1) {
4228 skipped = 1;
4229 --numdigits;
4230 }
4231 break;
4232 case 'x':
4233 case 'X':
4234 assert(buf[sign] == '0');
4235 assert(buf[sign + 1] == 'x');
4236 skipped = 2;
4237 numnondigits -= 2;
4238 break;
4239 }
4240 if (skipped) {
4241 buf += skipped;
4242 len -= skipped;
4243 if (sign)
4244 buf[0] = '-';
4245 }
4246 assert(len == numnondigits + numdigits);
4247 assert(numdigits > 0);
4248 }
4249
4250 /* Fill with leading zeroes to meet minimum width. */
4251 if (prec > numdigits) {
4252 PyObject *r1 = PyString_FromStringAndSize(NULL,
4253 numnondigits + prec);
4254 char *b1;
4255 if (!r1) {
4256 Py_DECREF(result);
4257 return NULL;
4258 }
4259 b1 = PyString_AS_STRING(r1);
4260 for (i = 0; i < numnondigits; ++i)
4261 *b1++ = *buf++;
4262 for (i = 0; i < prec - numdigits; i++)
4263 *b1++ = '0';
4264 for (i = 0; i < numdigits; i++)
4265 *b1++ = *buf++;
4266 *b1 = '\0';
4267 Py_DECREF(result);
4268 result = r1;
4269 buf = PyString_AS_STRING(result);
4270 len = numnondigits + prec;
4271 }
4272
4273 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004274 if (type == 'X') {
4275 /* Need to convert all lower case letters to upper case.
4276 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004277 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004278 if (buf[i] >= 'a' && buf[i] <= 'x')
4279 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004280 }
4281 *pbuf = buf;
4282 *plen = len;
4283 return result;
4284}
4285
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004286Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004287formatint(char *buf, size_t buflen, int flags,
4288 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004289{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004290 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004291 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4292 + 1 + 1 = 24 */
4293 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004294 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004295 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004296
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004297 x = PyInt_AsLong(v);
4298 if (x == -1 && PyErr_Occurred()) {
4299 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004300 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004301 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004302 if (x < 0 && type == 'u') {
4303 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004304 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004305 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4306 sign = "-";
4307 else
4308 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004309 if (prec < 0)
4310 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004311
4312 if ((flags & F_ALT) &&
4313 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004314 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004315 * of issues that cause pain:
4316 * - when 0 is being converted, the C standard leaves off
4317 * the '0x' or '0X', which is inconsistent with other
4318 * %#x/%#X conversions and inconsistent with Python's
4319 * hex() function
4320 * - there are platforms that violate the standard and
4321 * convert 0 with the '0x' or '0X'
4322 * (Metrowerks, Compaq Tru64)
4323 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004324 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004325 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004326 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004327 * We can achieve the desired consistency by inserting our
4328 * own '0x' or '0X' prefix, and substituting %x/%X in place
4329 * of %#x/%#X.
4330 *
4331 * Note that this is the same approach as used in
4332 * formatint() in unicodeobject.c
4333 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004334 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4335 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004336 }
4337 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004338 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4339 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340 prec, type);
4341 }
4342
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004343 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4344 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004345 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004346 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004347 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004348 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004349 return -1;
4350 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004351 if (sign[0])
4352 PyOS_snprintf(buf, buflen, fmt, -x);
4353 else
4354 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004355 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004356}
4357
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004358Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004359formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004360{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004361 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004362 if (PyString_Check(v)) {
4363 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004364 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004365 }
4366 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004367 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004368 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004369 }
4370 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004371 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004372}
4373
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004374/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4375
4376 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4377 chars are formatted. XXX This is a magic number. Each formatting
4378 routine does bounds checking to ensure no overflow, but a better
4379 solution may be to malloc a buffer of appropriate size for each
4380 format. For now, the current solution is sufficient.
4381*/
4382#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004383
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004384PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004385PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004386{
4387 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004388 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004389 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004390 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004391 PyObject *result, *orig_args;
4392#ifdef Py_USING_UNICODE
4393 PyObject *v, *w;
4394#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004395 PyObject *dict = NULL;
4396 if (format == NULL || !PyString_Check(format) || args == NULL) {
4397 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004398 return NULL;
4399 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004400 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004401 fmt = PyString_AS_STRING(format);
4402 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004403 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004404 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004405 if (result == NULL)
4406 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004407 res = PyString_AsString(result);
4408 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004409 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004410 argidx = 0;
4411 }
4412 else {
4413 arglen = -1;
4414 argidx = -2;
4415 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004416 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4417 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004418 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004419 while (--fmtcnt >= 0) {
4420 if (*fmt != '%') {
4421 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004422 rescnt = fmtcnt + 100;
4423 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004424 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004425 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004426 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004427 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004428 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004429 }
4430 *res++ = *fmt++;
4431 }
4432 else {
4433 /* Got a format specifier */
4434 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004435 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004436 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004437 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004438 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004439 PyObject *v = NULL;
4440 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004441 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004442 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004443 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004444 char formatbuf[FORMATBUFLEN];
4445 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004446#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004447 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004448 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004449#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004450
Guido van Rossumda9c2711996-12-05 21:58:58 +00004451 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004452 if (*fmt == '(') {
4453 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004454 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004455 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004456 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004457
4458 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004459 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004460 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004461 goto error;
4462 }
4463 ++fmt;
4464 --fmtcnt;
4465 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004466 /* Skip over balanced parentheses */
4467 while (pcount > 0 && --fmtcnt >= 0) {
4468 if (*fmt == ')')
4469 --pcount;
4470 else if (*fmt == '(')
4471 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004472 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004473 }
4474 keylen = fmt - keystart - 1;
4475 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004476 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004477 "incomplete format key");
4478 goto error;
4479 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480 key = PyString_FromStringAndSize(keystart,
4481 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004482 if (key == NULL)
4483 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004484 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004486 args_owned = 0;
4487 }
4488 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004490 if (args == NULL) {
4491 goto error;
4492 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004493 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004494 arglen = -1;
4495 argidx = -2;
4496 }
Guido van Rossume5372401993-03-16 12:15:04 +00004497 while (--fmtcnt >= 0) {
4498 switch (c = *fmt++) {
4499 case '-': flags |= F_LJUST; continue;
4500 case '+': flags |= F_SIGN; continue;
4501 case ' ': flags |= F_BLANK; continue;
4502 case '#': flags |= F_ALT; continue;
4503 case '0': flags |= F_ZERO; continue;
4504 }
4505 break;
4506 }
4507 if (c == '*') {
4508 v = getnextarg(args, arglen, &argidx);
4509 if (v == NULL)
4510 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 if (!PyInt_Check(v)) {
4512 PyErr_SetString(PyExc_TypeError,
4513 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004514 goto error;
4515 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004517 if (width < 0) {
4518 flags |= F_LJUST;
4519 width = -width;
4520 }
Guido van Rossume5372401993-03-16 12:15:04 +00004521 if (--fmtcnt >= 0)
4522 c = *fmt++;
4523 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004524 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004525 width = c - '0';
4526 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004527 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004528 if (!isdigit(c))
4529 break;
4530 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004531 PyErr_SetString(
4532 PyExc_ValueError,
4533 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004534 goto error;
4535 }
4536 width = width*10 + (c - '0');
4537 }
4538 }
4539 if (c == '.') {
4540 prec = 0;
4541 if (--fmtcnt >= 0)
4542 c = *fmt++;
4543 if (c == '*') {
4544 v = getnextarg(args, arglen, &argidx);
4545 if (v == NULL)
4546 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004547 if (!PyInt_Check(v)) {
4548 PyErr_SetString(
4549 PyExc_TypeError,
4550 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004551 goto error;
4552 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004553 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004554 if (prec < 0)
4555 prec = 0;
4556 if (--fmtcnt >= 0)
4557 c = *fmt++;
4558 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004559 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004560 prec = c - '0';
4561 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004562 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004563 if (!isdigit(c))
4564 break;
4565 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 PyErr_SetString(
4567 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004568 "prec too big");
4569 goto error;
4570 }
4571 prec = prec*10 + (c - '0');
4572 }
4573 }
4574 } /* prec */
4575 if (fmtcnt >= 0) {
4576 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004577 if (--fmtcnt >= 0)
4578 c = *fmt++;
4579 }
4580 }
4581 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004582 PyErr_SetString(PyExc_ValueError,
4583 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004584 goto error;
4585 }
4586 if (c != '%') {
4587 v = getnextarg(args, arglen, &argidx);
4588 if (v == NULL)
4589 goto error;
4590 }
4591 sign = 0;
4592 fill = ' ';
4593 switch (c) {
4594 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004595 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004596 len = 1;
4597 break;
4598 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004599#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004600 if (PyUnicode_Check(v)) {
4601 fmt = fmt_start;
4602 argidx = argidx_start;
4603 goto unicode;
4604 }
Georg Brandld45014b2005-10-01 17:06:00 +00004605#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004606 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004607#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004608 if (temp != NULL && PyUnicode_Check(temp)) {
4609 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004610 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004611 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004612 goto unicode;
4613 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004614#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004615 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004616 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004617 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004618 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004619 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004620 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004621 if (!PyString_Check(temp)) {
4622 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004623 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004624 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004625 goto error;
4626 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004627 pbuf = PyString_AS_STRING(temp);
4628 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004629 if (prec >= 0 && len > prec)
4630 len = prec;
4631 break;
4632 case 'i':
4633 case 'd':
4634 case 'u':
4635 case 'o':
4636 case 'x':
4637 case 'X':
4638 if (c == 'i')
4639 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004640 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004641 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004642 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004643 prec, c, &pbuf, &ilen);
4644 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004645 if (!temp)
4646 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004647 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004648 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004649 else {
4650 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004651 len = formatint(pbuf,
4652 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004653 flags, prec, c, v);
4654 if (len < 0)
4655 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004656 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004657 }
4658 if (flags & F_ZERO)
4659 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004660 break;
4661 case 'e':
4662 case 'E':
4663 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004664 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004665 case 'g':
4666 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004667 if (c == 'F')
4668 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004669 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004670 len = formatfloat(pbuf, sizeof(formatbuf),
4671 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004672 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004673 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004674 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004675 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004676 fill = '0';
4677 break;
4678 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004679#ifdef Py_USING_UNICODE
4680 if (PyUnicode_Check(v)) {
4681 fmt = fmt_start;
4682 argidx = argidx_start;
4683 goto unicode;
4684 }
4685#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004686 pbuf = formatbuf;
4687 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004688 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004689 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004690 break;
4691 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004692 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004693 "unsupported format character '%c' (0x%x) "
4694 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004695 c, c,
4696 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004697 goto error;
4698 }
4699 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004700 if (*pbuf == '-' || *pbuf == '+') {
4701 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004702 len--;
4703 }
4704 else if (flags & F_SIGN)
4705 sign = '+';
4706 else if (flags & F_BLANK)
4707 sign = ' ';
4708 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004709 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004710 }
4711 if (width < len)
4712 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004713 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004714 reslen -= rescnt;
4715 rescnt = width + fmtcnt + 100;
4716 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004717 if (reslen < 0) {
4718 Py_DECREF(result);
4719 return PyErr_NoMemory();
4720 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004721 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004722 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004723 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004724 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004725 }
4726 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004727 if (fill != ' ')
4728 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004729 rescnt--;
4730 if (width > len)
4731 width--;
4732 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004733 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4734 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004735 assert(pbuf[1] == c);
4736 if (fill != ' ') {
4737 *res++ = *pbuf++;
4738 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004739 }
Tim Petersfff53252001-04-12 18:38:48 +00004740 rescnt -= 2;
4741 width -= 2;
4742 if (width < 0)
4743 width = 0;
4744 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004745 }
4746 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004747 do {
4748 --rescnt;
4749 *res++ = fill;
4750 } while (--width > len);
4751 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004752 if (fill == ' ') {
4753 if (sign)
4754 *res++ = sign;
4755 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004756 (c == 'x' || c == 'X')) {
4757 assert(pbuf[0] == '0');
4758 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004759 *res++ = *pbuf++;
4760 *res++ = *pbuf++;
4761 }
4762 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004763 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004764 res += len;
4765 rescnt -= len;
4766 while (--width >= len) {
4767 --rescnt;
4768 *res++ = ' ';
4769 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004770 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004771 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004772 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004773 goto error;
4774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004775 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004776 } /* '%' */
4777 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004778 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004779 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004780 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004781 goto error;
4782 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004783 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004784 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004786 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004787 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004788
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004789#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004790 unicode:
4791 if (args_owned) {
4792 Py_DECREF(args);
4793 args_owned = 0;
4794 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004795 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004796 if (PyTuple_Check(orig_args) && argidx > 0) {
4797 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004798 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004799 v = PyTuple_New(n);
4800 if (v == NULL)
4801 goto error;
4802 while (--n >= 0) {
4803 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4804 Py_INCREF(w);
4805 PyTuple_SET_ITEM(v, n, w);
4806 }
4807 args = v;
4808 } else {
4809 Py_INCREF(orig_args);
4810 args = orig_args;
4811 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004812 args_owned = 1;
4813 /* Take what we have of the result and let the Unicode formatting
4814 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004815 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004816 if (_PyString_Resize(&result, rescnt))
4817 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004818 fmtcnt = PyString_GET_SIZE(format) - \
4819 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004820 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4821 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004822 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004823 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004824 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004825 if (v == NULL)
4826 goto error;
4827 /* Paste what we have (result) to what the Unicode formatting
4828 function returned (v) and return the result (or error) */
4829 w = PyUnicode_Concat(result, v);
4830 Py_DECREF(result);
4831 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004832 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004833 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004834#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004835
Guido van Rossume5372401993-03-16 12:15:04 +00004836 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004837 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004838 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004839 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004840 }
Guido van Rossume5372401993-03-16 12:15:04 +00004841 return NULL;
4842}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004843
Guido van Rossum2a61e741997-01-18 07:55:05 +00004844void
Fred Drakeba096332000-07-09 07:04:36 +00004845PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004846{
4847 register PyStringObject *s = (PyStringObject *)(*p);
4848 PyObject *t;
4849 if (s == NULL || !PyString_Check(s))
4850 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004851 /* If it's a string subclass, we don't really know what putting
4852 it in the interned dict might do. */
4853 if (!PyString_CheckExact(s))
4854 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004855 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004856 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004857 if (interned == NULL) {
4858 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004859 if (interned == NULL) {
4860 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004861 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004862 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004863 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004864 t = PyDict_GetItem(interned, (PyObject *)s);
4865 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004866 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004867 Py_DECREF(*p);
4868 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004869 return;
4870 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004871
Armin Rigo79f7ad22004-08-07 19:27:39 +00004872 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004873 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004874 return;
4875 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004876 /* The two references in interned are not counted by refcnt.
4877 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004878 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004879 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004880}
4881
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004882void
4883PyString_InternImmortal(PyObject **p)
4884{
4885 PyString_InternInPlace(p);
4886 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4887 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4888 Py_INCREF(*p);
4889 }
4890}
4891
Guido van Rossum2a61e741997-01-18 07:55:05 +00004892
4893PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004894PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004895{
4896 PyObject *s = PyString_FromString(cp);
4897 if (s == NULL)
4898 return NULL;
4899 PyString_InternInPlace(&s);
4900 return s;
4901}
4902
Guido van Rossum8cf04761997-08-02 02:57:45 +00004903void
Fred Drakeba096332000-07-09 07:04:36 +00004904PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004905{
4906 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004907 for (i = 0; i < UCHAR_MAX + 1; i++) {
4908 Py_XDECREF(characters[i]);
4909 characters[i] = NULL;
4910 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004911 Py_XDECREF(nullstring);
4912 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004913}
Barry Warsawa903ad982001-02-23 16:40:48 +00004914
Barry Warsawa903ad982001-02-23 16:40:48 +00004915void _Py_ReleaseInternedStrings(void)
4916{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004917 PyObject *keys;
4918 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004919 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004920
4921 if (interned == NULL || !PyDict_Check(interned))
4922 return;
4923 keys = PyDict_Keys(interned);
4924 if (keys == NULL || !PyList_Check(keys)) {
4925 PyErr_Clear();
4926 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004927 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004928
4929 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4930 detector, interned strings are not forcibly deallocated; rather, we
4931 give them their stolen references back, and then clear and DECREF
4932 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004933
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004934 fprintf(stderr, "releasing interned strings\n");
4935 n = PyList_GET_SIZE(keys);
4936 for (i = 0; i < n; i++) {
4937 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4938 switch (s->ob_sstate) {
4939 case SSTATE_NOT_INTERNED:
4940 /* XXX Shouldn't happen */
4941 break;
4942 case SSTATE_INTERNED_IMMORTAL:
4943 s->ob_refcnt += 1;
4944 break;
4945 case SSTATE_INTERNED_MORTAL:
4946 s->ob_refcnt += 2;
4947 break;
4948 default:
4949 Py_FatalError("Inconsistent interned string state.");
4950 }
4951 s->ob_sstate = SSTATE_NOT_INTERNED;
4952 }
4953 Py_DECREF(keys);
4954 PyDict_Clear(interned);
4955 Py_DECREF(interned);
4956 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004957}