blob: c881927a42f320346ede47485707b6bd78d5104e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000693/* -------------------------------------------------------------------- */
694/* object api */
695
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697string_getsize(register PyObject *op)
698{
699 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000700 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000701 if (PyString_AsStringAndSize(op, &s, &len))
702 return -1;
703 return len;
704}
705
706static /*const*/ char *
707string_getbuffer(register PyObject *op)
708{
709 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000710 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000711 if (PyString_AsStringAndSize(op, &s, &len))
712 return NULL;
713 return s;
714}
715
Martin v. Löwis18e16552006-02-15 17:27:45 +0000716Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000717PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (!PyString_Check(op))
720 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722}
723
724/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732int
733PyString_AsStringAndSize(register PyObject *obj,
734 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000735 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000736{
737 if (s == NULL) {
738 PyErr_BadInternalCall();
739 return -1;
740 }
741
742 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000743#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744 if (PyUnicode_Check(obj)) {
745 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
746 if (obj == NULL)
747 return -1;
748 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000749 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000750#endif
751 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 PyErr_Format(PyExc_TypeError,
753 "expected string or Unicode object, "
754 "%.200s found", obj->ob_type->tp_name);
755 return -1;
756 }
757 }
758
759 *s = PyString_AS_STRING(obj);
760 if (len != NULL)
761 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000762 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000763 PyErr_SetString(PyExc_TypeError,
764 "expected string without null bytes");
765 return -1;
766 }
767 return 0;
768}
769
Fredrik Lundhaf722372006-05-25 17:55:31 +0000770/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000771/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000772
Fredrik Lundha50d2012006-05-26 17:04:58 +0000773#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000776#define STRINGLIB_LEN PyString_GET_SIZE
777#define STRINGLIB_NEW PyString_FromStringAndSize
778#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000779
Fredrik Lundhb9479482006-05-26 17:22:38 +0000780#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000781
Fredrik Lundha50d2012006-05-26 17:04:58 +0000782#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000783
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000784#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000785#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000786#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000787
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000789static int
Fred Drakeba096332000-07-09 07:04:36 +0000790string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000792 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000795
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000796 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000797 if (! PyString_CheckExact(op)) {
798 int ret;
799 /* A str subclass may have its own __str__ method. */
800 op = (PyStringObject *) PyObject_Str((PyObject *)op);
801 if (op == NULL)
802 return -1;
803 ret = string_print(op, fp, flags);
804 Py_DECREF(op);
805 return ret;
806 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000807 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000808#ifdef __VMS
809 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
810#else
811 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
812#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000813 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000818 if (memchr(op->ob_sval, '\'', op->ob_size) &&
819 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 quote = '"';
821
822 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 for (i = 0; i < op->ob_size; i++) {
824 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000827 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000828 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000829 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000831 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000832 fprintf(fp, "\\r");
833 else if (c < ' ' || c >= 0x7f)
834 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000835 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000836 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000839 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840}
841
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842PyObject *
843PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000845 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000846 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000847 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000848 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000849 PyErr_SetString(PyExc_OverflowError,
850 "string is too large to make repr");
851 }
852 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000854 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
856 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000857 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000858 register char c;
859 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 int quote;
861
Thomas Wouters7e474022000-07-16 12:04:32 +0000862 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000863 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000864 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000865 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000866 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 quote = '"';
868
Tim Peters9161c8b2001-12-03 01:55:38 +0000869 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000870 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000872 /* There's at least enough room for a hex escape
873 and a closing quote. */
874 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000878 else if (c == '\t')
879 *p++ = '\\', *p++ = 't';
880 else if (c == '\n')
881 *p++ = '\\', *p++ = 'n';
882 else if (c == '\r')
883 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000884 else if (c < ' ' || c >= 0x7f) {
885 /* For performance, we don't want to call
886 PyOS_snprintf here (extra layers of
887 function call). */
888 sprintf(p, "\\x%02x", c & 0xff);
889 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000890 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000891 else
892 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000894 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000897 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000898 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000899 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901}
902
Guido van Rossum189f1df2001-05-01 16:51:53 +0000903static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000904string_repr(PyObject *op)
905{
906 return PyString_Repr(op, 1);
907}
908
909static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000910string_str(PyObject *s)
911{
Tim Petersc9933152001-10-16 20:18:24 +0000912 assert(PyString_Check(s));
913 if (PyString_CheckExact(s)) {
914 Py_INCREF(s);
915 return s;
916 }
917 else {
918 /* Subtype -- return genuine string with the same value. */
919 PyStringObject *t = (PyStringObject *) s;
920 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
921 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000922}
923
Martin v. Löwis18e16552006-02-15 17:27:45 +0000924static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000925string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926{
927 return a->ob_size;
928}
929
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000931string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932{
Andrew Dalke598710c2006-05-25 18:18:39 +0000933 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 register PyStringObject *op;
935 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000936#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000937 if (PyUnicode_Check(bb))
938 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000939#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000940 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000941 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000942 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 return NULL;
944 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000947 if ((a->ob_size == 0 || b->ob_size == 0) &&
948 PyString_CheckExact(a) && PyString_CheckExact(b)) {
949 if (a->ob_size == 0) {
950 Py_INCREF(bb);
951 return bb;
952 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953 Py_INCREF(a);
954 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 }
956 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000957 if (size < 0) {
958 PyErr_SetString(PyExc_OverflowError,
959 "strings are too large to concat");
960 return NULL;
961 }
962
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000963 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000964 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000965 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000967 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000968 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000969 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000970 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
971 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000972 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000973 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974#undef b
975}
976
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000978string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000980 register Py_ssize_t i;
981 register Py_ssize_t j;
982 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000984 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985 if (n < 0)
986 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000987 /* watch out for overflows: the size can overflow int,
988 * and the # of bytes needed can overflow size_t
989 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000991 if (n && size / n != a->ob_size) {
992 PyErr_SetString(PyExc_OverflowError,
993 "repeated string is too long");
994 return NULL;
995 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000996 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
Tim Peterse7c05322004-06-27 17:24:49 +00001000 nbytes = (size_t)size;
1001 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001002 PyErr_SetString(PyExc_OverflowError,
1003 "repeated string is too long");
1004 return NULL;
1005 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001007 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001008 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001009 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001010 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001011 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001012 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001013 op->ob_sval[size] = '\0';
1014 if (a->ob_size == 1 && n > 0) {
1015 memset(op->ob_sval, a->ob_sval[0] , n);
1016 return (PyObject *) op;
1017 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001018 i = 0;
1019 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1021 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001022 }
1023 while (i < size) {
1024 j = (i <= size-i) ? i : size-i;
1025 memcpy(op->ob_sval+i, op->ob_sval, j);
1026 i += j;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001029}
1030
1031/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1032
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001033static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001034string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001035 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001036 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
1038 if (i < 0)
1039 i = 0;
1040 if (j < 0)
1041 j = 0; /* Avoid signed/unsigned bug in next line */
1042 if (j > a->ob_size)
1043 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001044 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1045 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 Py_INCREF(a);
1047 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 }
1049 if (j < i)
1050 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001051 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001052}
1053
Guido van Rossum9284a572000-03-07 15:53:43 +00001054static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001055string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001056{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001057 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001058#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001059 if (PyUnicode_Check(sub_obj))
1060 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001061#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001062 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001063 PyErr_SetString(PyExc_TypeError,
1064 "'in <string>' requires string as left operand");
1065 return -1;
1066 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001067 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001068
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001069 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001070}
1071
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001072static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001075 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001078 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001079 return NULL;
1080 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 pchar = a->ob_sval[i];
1082 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001083 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001085 else {
1086#ifdef COUNT_ALLOCS
1087 one_strings++;
1088#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001089 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001090 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001091 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092}
1093
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094static PyObject*
1095string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001098 Py_ssize_t len_a, len_b;
1099 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 PyObject *result;
1101
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001102 /* Make sure both arguments are strings. */
1103 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 result = Py_NotImplemented;
1105 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107 if (a == b) {
1108 switch (op) {
1109 case Py_EQ:case Py_LE:case Py_GE:
1110 result = Py_True;
1111 goto out;
1112 case Py_NE:case Py_LT:case Py_GT:
1113 result = Py_False;
1114 goto out;
1115 }
1116 }
1117 if (op == Py_EQ) {
1118 /* Supporting Py_NE here as well does not save
1119 much time, since Py_NE is rarely used. */
1120 if (a->ob_size == b->ob_size
1121 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001122 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 a->ob_size) == 0)) {
1124 result = Py_True;
1125 } else {
1126 result = Py_False;
1127 }
1128 goto out;
1129 }
1130 len_a = a->ob_size; len_b = b->ob_size;
1131 min_len = (len_a < len_b) ? len_a : len_b;
1132 if (min_len > 0) {
1133 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1134 if (c==0)
1135 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1136 }else
1137 c = 0;
1138 if (c == 0)
1139 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1140 switch (op) {
1141 case Py_LT: c = c < 0; break;
1142 case Py_LE: c = c <= 0; break;
1143 case Py_EQ: assert(0); break; /* unreachable */
1144 case Py_NE: c = c != 0; break;
1145 case Py_GT: c = c > 0; break;
1146 case Py_GE: c = c >= 0; break;
1147 default:
1148 result = Py_NotImplemented;
1149 goto out;
1150 }
1151 result = c ? Py_True : Py_False;
1152 out:
1153 Py_INCREF(result);
1154 return result;
1155}
1156
1157int
1158_PyString_Eq(PyObject *o1, PyObject *o2)
1159{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001160 PyStringObject *a = (PyStringObject*) o1;
1161 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001162 return a->ob_size == b->ob_size
1163 && *a->ob_sval == *b->ob_sval
1164 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001165}
1166
Guido van Rossum9bfef441993-03-29 10:43:31 +00001167static long
Fred Drakeba096332000-07-09 07:04:36 +00001168string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001169{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001170 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001171 register unsigned char *p;
1172 register long x;
1173
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001174 if (a->ob_shash != -1)
1175 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001176 len = a->ob_size;
1177 p = (unsigned char *) a->ob_sval;
1178 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001180 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181 x ^= a->ob_size;
1182 if (x == -1)
1183 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185 return x;
1186}
1187
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001188#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1189
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190static PyObject*
1191string_subscript(PyStringObject* self, PyObject* item)
1192{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001193 PyNumberMethods *nb = item->ob_type->tp_as_number;
1194 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1195 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001196 if (i == -1 && PyErr_Occurred())
1197 return NULL;
1198 if (i < 0)
1199 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001200 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201 }
1202 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001203 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 char* source_buf;
1205 char* result_buf;
1206 PyObject* result;
1207
Tim Petersae1d0c92006-03-17 03:29:34 +00001208 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 PyString_GET_SIZE(self),
1210 &start, &stop, &step, &slicelength) < 0) {
1211 return NULL;
1212 }
1213
1214 if (slicelength <= 0) {
1215 return PyString_FromStringAndSize("", 0);
1216 }
1217 else {
1218 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001219 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001220 if (result_buf == NULL)
1221 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001222
Tim Petersae1d0c92006-03-17 03:29:34 +00001223 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 cur += step, i++) {
1225 result_buf[i] = source_buf[cur];
1226 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001227
1228 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001229 slicelength);
1230 PyMem_Free(result_buf);
1231 return result;
1232 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001233 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001235 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001236 "string indices must be integers");
1237 return NULL;
1238 }
1239}
1240
Martin v. Löwis18e16552006-02-15 17:27:45 +00001241static Py_ssize_t
1242string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001243{
1244 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001245 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001246 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001247 return -1;
1248 }
1249 *ptr = (void *)self->ob_sval;
1250 return self->ob_size;
1251}
1252
Martin v. Löwis18e16552006-02-15 17:27:45 +00001253static Py_ssize_t
1254string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255{
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001257 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259}
1260
Martin v. Löwis18e16552006-02-15 17:27:45 +00001261static Py_ssize_t
1262string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263{
1264 if ( lenp )
1265 *lenp = self->ob_size;
1266 return 1;
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001271{
1272 if ( index != 0 ) {
1273 PyErr_SetString(PyExc_SystemError,
1274 "accessing non-existent string segment");
1275 return -1;
1276 }
1277 *ptr = self->ob_sval;
1278 return self->ob_size;
1279}
1280
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001281static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001282 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001283 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284 (ssizeargfunc)string_repeat, /*sq_repeat*/
1285 (ssizeargfunc)string_item, /*sq_item*/
1286 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001287 0, /*sq_ass_item*/
1288 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001289 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001290};
1291
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001292static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001294 (binaryfunc)string_subscript,
1295 0,
1296};
1297
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001299 (readbufferproc)string_buffer_getreadbuf,
1300 (writebufferproc)string_buffer_getwritebuf,
1301 (segcountproc)string_buffer_getsegcount,
1302 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001303};
1304
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305
1306
1307#define LEFTSTRIP 0
1308#define RIGHTSTRIP 1
1309#define BOTHSTRIP 2
1310
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001311/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001312static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1313
1314#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001315
Andrew Dalke525eab32006-05-26 14:00:45 +00001316
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001317/* Don't call if length < 2 */
1318#define Py_STRING_MATCH(target, offset, pattern, length) \
1319 (target[offset] == pattern[0] && \
1320 target[offset+length-1] == pattern[length-1] && \
1321 !memcmp(target+offset+1, pattern+1, length-2) )
1322
1323
Andrew Dalke525eab32006-05-26 14:00:45 +00001324/* Overallocate the initial list to reduce the number of reallocs for small
1325 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1326 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1327 text (roughly 11 words per line) and field delimited data (usually 1-10
1328 fields). For large strings the split algorithms are bandwidth limited
1329 so increasing the preallocation likely will not improve things.*/
1330
1331#define MAX_PREALLOC 12
1332
1333/* 5 splits gives 6 elements */
1334#define PREALLOC_SIZE(maxsplit) \
1335 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1336
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337#define SPLIT_APPEND(data, left, right) \
1338 str = PyString_FromStringAndSize((data) + (left), \
1339 (right) - (left)); \
1340 if (str == NULL) \
1341 goto onError; \
1342 if (PyList_Append(list, str)) { \
1343 Py_DECREF(str); \
1344 goto onError; \
1345 } \
1346 else \
1347 Py_DECREF(str);
1348
Andrew Dalke02758d62006-05-26 15:21:01 +00001349#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001350 str = PyString_FromStringAndSize((data) + (left), \
1351 (right) - (left)); \
1352 if (str == NULL) \
1353 goto onError; \
1354 if (count < MAX_PREALLOC) { \
1355 PyList_SET_ITEM(list, count, str); \
1356 } else { \
1357 if (PyList_Append(list, str)) { \
1358 Py_DECREF(str); \
1359 goto onError; \
1360 } \
1361 else \
1362 Py_DECREF(str); \
1363 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001364 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001365
1366/* Always force the list to the expected size. */
1367#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1368
Andrew Dalke02758d62006-05-26 15:21:01 +00001369#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1370#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1371#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1372#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1373
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001374Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001375split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376{
Andrew Dalke525eab32006-05-26 14:00:45 +00001377 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001378 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001379 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380
1381 if (list == NULL)
1382 return NULL;
1383
Andrew Dalke02758d62006-05-26 15:21:01 +00001384 i = j = 0;
1385
1386 while (maxsplit-- > 0) {
1387 SKIP_SPACE(s, i, len);
1388 if (i==len) break;
1389 j = i; i++;
1390 SKIP_NONSPACE(s, i, len);
1391 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001393
1394 if (i < len) {
1395 /* Only occurs when maxsplit was reached */
1396 /* Skip any remaining whitespace and copy to end of string */
1397 SKIP_SPACE(s, i, len);
1398 if (i != len)
1399 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001401 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001403 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404 Py_DECREF(list);
1405 return NULL;
1406}
1407
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001408Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001409split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410{
Andrew Dalke525eab32006-05-26 14:00:45 +00001411 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001413 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414
1415 if (list == NULL)
1416 return NULL;
1417
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001418 i = j = 0;
1419 while ((j < len) && (maxcount-- > 0)) {
1420 for(; j<len; j++) {
1421 /* I found that using memchr makes no difference */
1422 if (s[j] == ch) {
1423 SPLIT_ADD(s, i, j);
1424 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001426 }
1427 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001429 if (i <= len) {
1430 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001431 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001432 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 return list;
1434
1435 onError:
1436 Py_DECREF(list);
1437 return NULL;
1438}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001440PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441"S.split([sep [,maxsplit]]) -> list of strings\n\
1442\n\
1443Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001444delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001445splits are done. If sep is not specified or is None, any\n\
1446whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001449string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001451 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001452 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001453 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001454 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001455#ifdef USE_FAST
1456 Py_ssize_t pos;
1457#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
Martin v. Löwis9c830762006-04-13 08:37:17 +00001459 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001462 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001463 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001465 if (PyString_Check(subobj)) {
1466 sub = PyString_AS_STRING(subobj);
1467 n = PyString_GET_SIZE(subobj);
1468 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001469#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001470 else if (PyUnicode_Check(subobj))
1471 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001472#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1474 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001475
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476 if (n == 0) {
1477 PyErr_SetString(PyExc_ValueError, "empty separator");
1478 return NULL;
1479 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001480 else if (n == 1)
1481 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 if (list == NULL)
1485 return NULL;
1486
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001487#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001489 while (maxsplit-- > 0) {
1490 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1491 if (pos < 0)
1492 break;
1493 j = i+pos;
1494 SPLIT_ADD(s, i, j);
1495 i = j + n;
1496
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001498#else
1499 i = j = 0;
1500 while ((j+n <= len) && (maxsplit-- > 0)) {
1501 for (; j+n <= len; j++) {
1502 if (Py_STRING_MATCH(s, j, sub, n)) {
1503 SPLIT_ADD(s, i, j);
1504 i = j = j + n;
1505 break;
1506 }
1507 }
1508 }
1509#endif
1510 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001511 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 return list;
1513
Andrew Dalke525eab32006-05-26 14:00:45 +00001514 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 Py_DECREF(list);
1516 return NULL;
1517}
1518
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001519PyDoc_STRVAR(partition__doc__,
1520"S.partition(sep) -> (head, sep, tail)\n\
1521\n\
1522Searches for the separator sep in S, and returns the part before it,\n\
1523the separator itself, and the part after it. If the separator is not\n\
1524found, returns S and two empty strings.");
1525
1526static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001527string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001528{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001529 const char *sep;
1530 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001531
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001532 if (PyString_Check(sep_obj)) {
1533 sep = PyString_AS_STRING(sep_obj);
1534 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001535 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001536#ifdef Py_USING_UNICODE
1537 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001538 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001539#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001540 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001541 return NULL;
1542
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001543 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001544 (PyObject*) self,
1545 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1546 sep_obj, sep, sep_len
1547 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548}
1549
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001550PyDoc_STRVAR(rpartition__doc__,
1551"S.rpartition(sep) -> (head, sep, tail)\n\
1552\n\
1553Searches for the separator sep in S, starting at the end of S, and returns\n\
1554the part before it, the separator itself, and the part after it. If the\n\
1555separator is not found, returns S and two empty strings.");
1556
1557static PyObject *
1558string_rpartition(PyStringObject *self, PyObject *sep_obj)
1559{
1560 const char *sep;
1561 Py_ssize_t sep_len;
1562
1563 if (PyString_Check(sep_obj)) {
1564 sep = PyString_AS_STRING(sep_obj);
1565 sep_len = PyString_GET_SIZE(sep_obj);
1566 }
1567#ifdef Py_USING_UNICODE
1568 else if (PyUnicode_Check(sep_obj))
1569 return PyUnicode_Partition((PyObject *) self, sep_obj);
1570#endif
1571 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1572 return NULL;
1573
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001574 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001575 (PyObject*) self,
1576 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1577 sep_obj, sep, sep_len
1578 );
1579}
1580
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001581Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001582rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001583{
Andrew Dalke525eab32006-05-26 14:00:45 +00001584 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001585 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001586 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001587
1588 if (list == NULL)
1589 return NULL;
1590
Andrew Dalke02758d62006-05-26 15:21:01 +00001591 i = j = len-1;
1592
1593 while (maxsplit-- > 0) {
1594 RSKIP_SPACE(s, i);
1595 if (i<0) break;
1596 j = i; i--;
1597 RSKIP_NONSPACE(s, i);
1598 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001599 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001600 if (i >= 0) {
1601 /* Only occurs when maxsplit was reached */
1602 /* Skip any remaining whitespace and copy to beginning of string */
1603 RSKIP_SPACE(s, i);
1604 if (i >= 0)
1605 SPLIT_ADD(s, 0, i + 1);
1606
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001608 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001609 if (PyList_Reverse(list) < 0)
1610 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001611 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613 Py_DECREF(list);
1614 return NULL;
1615}
1616
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001617Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001618rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619{
Andrew Dalke525eab32006-05-26 14:00:45 +00001620 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001621 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001622 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001623
1624 if (list == NULL)
1625 return NULL;
1626
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001627 i = j = len - 1;
1628 while ((i >= 0) && (maxcount-- > 0)) {
1629 for (; i >= 0; i--) {
1630 if (s[i] == ch) {
1631 SPLIT_ADD(s, i + 1, j + 1);
1632 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001634 }
1635 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001636 }
1637 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001638 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001640 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001641 if (PyList_Reverse(list) < 0)
1642 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 return list;
1644
1645 onError:
1646 Py_DECREF(list);
1647 return NULL;
1648}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001649
1650PyDoc_STRVAR(rsplit__doc__,
1651"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1652\n\
1653Return a list of the words in the string S, using sep as the\n\
1654delimiter string, starting at the end of the string and working\n\
1655to the front. If maxsplit is given, at most maxsplit splits are\n\
1656done. If sep is not specified or is None, any whitespace string\n\
1657is a separator.");
1658
1659static PyObject *
1660string_rsplit(PyStringObject *self, PyObject *args)
1661{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001662 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001663 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001665 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666
Martin v. Löwis9c830762006-04-13 08:37:17 +00001667 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668 return NULL;
1669 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001670 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001671 if (subobj == Py_None)
1672 return rsplit_whitespace(s, len, maxsplit);
1673 if (PyString_Check(subobj)) {
1674 sub = PyString_AS_STRING(subobj);
1675 n = PyString_GET_SIZE(subobj);
1676 }
1677#ifdef Py_USING_UNICODE
1678 else if (PyUnicode_Check(subobj))
1679 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1680#endif
1681 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1682 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001683
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001684 if (n == 0) {
1685 PyErr_SetString(PyExc_ValueError, "empty separator");
1686 return NULL;
1687 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001688 else if (n == 1)
1689 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690
Andrew Dalke525eab32006-05-26 14:00:45 +00001691 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 if (list == NULL)
1693 return NULL;
1694
1695 j = len;
1696 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001697
1698 while ( (i >= 0) && (maxsplit-- > 0) ) {
1699 for (; i>=0; i--) {
1700 if (Py_STRING_MATCH(s, i, sub, n)) {
1701 SPLIT_ADD(s, i + n, j);
1702 j = i;
1703 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001704 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001705 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001706 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001708 SPLIT_ADD(s, 0, j);
1709 FIX_PREALLOC_SIZE(list);
1710 if (PyList_Reverse(list) < 0)
1711 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712 return list;
1713
Andrew Dalke525eab32006-05-26 14:00:45 +00001714onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 Py_DECREF(list);
1716 return NULL;
1717}
1718
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001720PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721"S.join(sequence) -> string\n\
1722\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001724sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725
1726static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001727string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728{
1729 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001733 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001734 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001735 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001736 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737
Tim Peters19fe14e2001-01-19 03:03:47 +00001738 seq = PySequence_Fast(orig, "");
1739 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001740 return NULL;
1741 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001742
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001743 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 if (seqlen == 0) {
1745 Py_DECREF(seq);
1746 return PyString_FromString("");
1747 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001749 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001750 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1751 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001753 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001756
Raymond Hettinger674f2412004-08-23 23:23:54 +00001757 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001758 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001759 * Do a pre-pass to figure out the total amount of space we'll
1760 * need (sz), see whether any argument is absurd, and defer to
1761 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001765 item = PySequence_Fast_GET_ITEM(seq, i);
1766 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001767#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001768 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001769 /* Defer to Unicode join.
1770 * CAUTION: There's no gurantee that the
1771 * original sequence can be iterated over
1772 * again, so we must pass seq here.
1773 */
1774 PyObject *result;
1775 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001776 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001777 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001779#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001780 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001781 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001782 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001783 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 Py_DECREF(seq);
1785 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001787 sz += PyString_GET_SIZE(item);
1788 if (i != 0)
1789 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001790 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 PyErr_SetString(PyExc_OverflowError,
1792 "join() is too long for a Python string");
1793 Py_DECREF(seq);
1794 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001796 }
1797
1798 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001799 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001800 if (res == NULL) {
1801 Py_DECREF(seq);
1802 return NULL;
1803 }
1804
1805 /* Catenate everything. */
1806 p = PyString_AS_STRING(res);
1807 for (i = 0; i < seqlen; ++i) {
1808 size_t n;
1809 item = PySequence_Fast_GET_ITEM(seq, i);
1810 n = PyString_GET_SIZE(item);
1811 memcpy(p, PyString_AS_STRING(item), n);
1812 p += n;
1813 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001814 memcpy(p, sep, seplen);
1815 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001816 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001818
Jeremy Hylton49048292000-07-11 03:28:17 +00001819 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
Tim Peters52e155e2001-06-16 05:42:57 +00001823PyObject *
1824_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001825{
Tim Petersa7259592001-06-16 05:11:17 +00001826 assert(sep != NULL && PyString_Check(sep));
1827 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001828 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001829}
1830
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001831Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001832string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001833{
1834 if (*end > len)
1835 *end = len;
1836 else if (*end < 0)
1837 *end += len;
1838 if (*end < 0)
1839 *end = 0;
1840 if (*start < 0)
1841 *start += len;
1842 if (*start < 0)
1843 *start = 0;
1844}
1845
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001846Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001847string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001850 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001851 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001855 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001856 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 return -2;
1858 if (PyString_Check(subobj)) {
1859 sub = PyString_AS_STRING(subobj);
1860 n = PyString_GET_SIZE(subobj);
1861 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001862#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001864 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001865#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 return -2;
1868
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001869 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001871 if (dir > 0)
1872 return stringlib_find(s+i, last-i, sub, n, i);
1873 else
1874 return stringlib_rfind(s+i, last-i, sub, n, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875}
1876
1877
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001878PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879"S.find(sub [,start [,end]]) -> int\n\
1880\n\
1881Return the lowest index in S where substring sub is found,\n\
1882such that sub is contained within s[start,end]. Optional\n\
1883arguments start and end are interpreted as in slice notation.\n\
1884\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001885Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886
1887static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001888string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001890 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891 if (result == -2)
1892 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894}
1895
1896
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898"S.index(sub [,start [,end]]) -> int\n\
1899\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001900Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901
1902static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001903string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 if (result == -2)
1907 return NULL;
1908 if (result == -1) {
1909 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001910 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return NULL;
1912 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914}
1915
1916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001917PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918"S.rfind(sub [,start [,end]]) -> int\n\
1919\n\
1920Return the highest index in S where substring sub is found,\n\
1921such that sub is contained within s[start,end]. Optional\n\
1922arguments start and end are interpreted as in slice notation.\n\
1923\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001924Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925
1926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001927string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001929 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930 if (result == -2)
1931 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933}
1934
1935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937"S.rindex(sub [,start [,end]]) -> int\n\
1938\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940
1941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001942string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945 if (result == -2)
1946 return NULL;
1947 if (result == -1) {
1948 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001949 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 return NULL;
1951 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001952 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953}
1954
1955
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001956Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1958{
1959 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001961 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1963 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964
1965 i = 0;
1966 if (striptype != RIGHTSTRIP) {
1967 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1968 i++;
1969 }
1970 }
1971
1972 j = len;
1973 if (striptype != LEFTSTRIP) {
1974 do {
1975 j--;
1976 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1977 j++;
1978 }
1979
1980 if (i == 0 && j == len && PyString_CheckExact(self)) {
1981 Py_INCREF(self);
1982 return (PyObject*)self;
1983 }
1984 else
1985 return PyString_FromStringAndSize(s+i, j-i);
1986}
1987
1988
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001989Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001990do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991{
1992 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001993 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995 i = 0;
1996 if (striptype != RIGHTSTRIP) {
1997 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1998 i++;
1999 }
2000 }
2001
2002 j = len;
2003 if (striptype != LEFTSTRIP) {
2004 do {
2005 j--;
2006 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2007 j++;
2008 }
2009
Tim Peters8fa5dd02001-09-12 02:18:30 +00002010 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 Py_INCREF(self);
2012 return (PyObject*)self;
2013 }
2014 else
2015 return PyString_FromStringAndSize(s+i, j-i);
2016}
2017
2018
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002019Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002020do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2021{
2022 PyObject *sep = NULL;
2023
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002024 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002025 return NULL;
2026
2027 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002028 if (PyString_Check(sep))
2029 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002030#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002031 else if (PyUnicode_Check(sep)) {
2032 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2033 PyObject *res;
2034 if (uniself==NULL)
2035 return NULL;
2036 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2037 striptype, sep);
2038 Py_DECREF(uniself);
2039 return res;
2040 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002041#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002042 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002043#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002044 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002045#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002046 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002047#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002048 STRIPNAME(striptype));
2049 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002050 }
2051
2052 return do_strip(self, striptype);
2053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002057"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058\n\
2059Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002061If chars is given and not None, remove characters in chars instead.\n\
2062If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
2064static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002065string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067 if (PyTuple_GET_SIZE(args) == 0)
2068 return do_strip(self, BOTHSTRIP); /* Common case */
2069 else
2070 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071}
2072
2073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002074PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002078If chars is given and not None, remove characters in chars instead.\n\
2079If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080
2081static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002082string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084 if (PyTuple_GET_SIZE(args) == 0)
2085 return do_strip(self, LEFTSTRIP); /* Common case */
2086 else
2087 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088}
2089
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002095If chars is given and not None, remove characters in chars instead.\n\
2096If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
2098static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002099string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101 if (PyTuple_GET_SIZE(args) == 0)
2102 return do_strip(self, RIGHTSTRIP); /* Common case */
2103 else
2104 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105}
2106
2107
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002108PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109"S.lower() -> string\n\
2110\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002113/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2114#ifndef _tolower
2115#define _tolower tolower
2116#endif
2117
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002119string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002121 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002122 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002123 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002125 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002126 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002128
2129 s = PyString_AS_STRING(newobj);
2130
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002131 memcpy(s, PyString_AS_STRING(self), n);
2132
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002134 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002135 if (isupper(c))
2136 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138
Anthony Baxtera6286212006-04-11 07:42:36 +00002139 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140}
2141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002142PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143"S.upper() -> string\n\
2144\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002147#ifndef _toupper
2148#define _toupper toupper
2149#endif
2150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002152string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002154 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002155 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002156 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002158 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002159 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161
2162 s = PyString_AS_STRING(newobj);
2163
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002164 memcpy(s, PyString_AS_STRING(self), n);
2165
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002167 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 if (islower(c))
2169 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171
Anthony Baxtera6286212006-04-11 07:42:36 +00002172 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173}
2174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002175PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176"S.title() -> string\n\
2177\n\
2178Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002179characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180
2181static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002182string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183{
2184 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002185 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002187 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 newobj = PyString_FromStringAndSize(NULL, n);
2190 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002192 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 for (i = 0; i < n; i++) {
2194 int c = Py_CHARMASK(*s++);
2195 if (islower(c)) {
2196 if (!previous_is_cased)
2197 c = toupper(c);
2198 previous_is_cased = 1;
2199 } else if (isupper(c)) {
2200 if (previous_is_cased)
2201 c = tolower(c);
2202 previous_is_cased = 1;
2203 } else
2204 previous_is_cased = 0;
2205 *s_new++ = c;
2206 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002207 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002208}
2209
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211"S.capitalize() -> string\n\
2212\n\
2213Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002214capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215
2216static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002217string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218{
2219 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002220 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002221 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 newobj = PyString_FromStringAndSize(NULL, n);
2224 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 if (0 < n) {
2228 int c = Py_CHARMASK(*s++);
2229 if (islower(c))
2230 *s_new = toupper(c);
2231 else
2232 *s_new = c;
2233 s_new++;
2234 }
2235 for (i = 1; i < n; i++) {
2236 int c = Py_CHARMASK(*s++);
2237 if (isupper(c))
2238 *s_new = tolower(c);
2239 else
2240 *s_new = c;
2241 s_new++;
2242 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002243 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244}
2245
2246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248"S.count(sub[, start[, end]]) -> int\n\
2249\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002250Return the number of non-overlapping occurrences of substring sub in\n\
2251string S[start:end]. Optional arguments start and end are interpreted\n\
2252as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253
2254static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002255string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002257 PyObject *sub_obj;
2258 const char *str = PyString_AS_STRING(self), *sub;
2259 Py_ssize_t sub_len;
2260 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002262 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002265
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002266 if (PyString_Check(sub_obj)) {
2267 sub = PyString_AS_STRING(sub_obj);
2268 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002270#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002271 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002272 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002273 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002274 if (count == -1)
2275 return NULL;
2276 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002277 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 return NULL;
2282
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002283 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002284
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 return PyInt_FromSsize_t(
2286 stringlib_count(str + start, end - start, sub, sub_len)
2287 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288}
2289
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002290PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291"S.swapcase() -> string\n\
2292\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002294converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295
2296static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002297string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298{
2299 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002300 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002301 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302
Anthony Baxtera6286212006-04-11 07:42:36 +00002303 newobj = PyString_FromStringAndSize(NULL, n);
2304 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002306 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 for (i = 0; i < n; i++) {
2308 int c = Py_CHARMASK(*s++);
2309 if (islower(c)) {
2310 *s_new = toupper(c);
2311 }
2312 else if (isupper(c)) {
2313 *s_new = tolower(c);
2314 }
2315 else
2316 *s_new = c;
2317 s_new++;
2318 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002319 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320}
2321
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.translate(table [,deletechars]) -> string\n\
2325\n\
2326Return a copy of the string S, where all characters occurring\n\
2327in the optional argument deletechars are removed, and the\n\
2328remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002329translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330
2331static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002332string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 register char *input, *output;
2335 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002336 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002339 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 PyObject *result;
2341 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002342 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002344 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347
2348 if (PyString_Check(tableobj)) {
2349 table1 = PyString_AS_STRING(tableobj);
2350 tablen = PyString_GET_SIZE(tableobj);
2351 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002352#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002354 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355 parameter; instead a mapping to None will cause characters
2356 to be deleted. */
2357 if (delobj != NULL) {
2358 PyErr_SetString(PyExc_TypeError,
2359 "deletions are implemented differently for unicode");
2360 return NULL;
2361 }
2362 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2363 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002364#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367
Martin v. Löwis00b61272002-12-12 20:03:19 +00002368 if (tablen != 256) {
2369 PyErr_SetString(PyExc_ValueError,
2370 "translation table must be 256 characters long");
2371 return NULL;
2372 }
2373
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 if (delobj != NULL) {
2375 if (PyString_Check(delobj)) {
2376 del_table = PyString_AS_STRING(delobj);
2377 dellen = PyString_GET_SIZE(delobj);
2378 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002379#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 else if (PyUnicode_Check(delobj)) {
2381 PyErr_SetString(PyExc_TypeError,
2382 "deletions are implemented differently for unicode");
2383 return NULL;
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2387 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 }
2389 else {
2390 del_table = NULL;
2391 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 }
2393
2394 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002395 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396 result = PyString_FromStringAndSize((char *)NULL, inlen);
2397 if (result == NULL)
2398 return NULL;
2399 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002400 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401
2402 if (dellen == 0) {
2403 /* If no deletions are required, use faster code */
2404 for (i = inlen; --i >= 0; ) {
2405 c = Py_CHARMASK(*input++);
2406 if (Py_CHARMASK((*output++ = table[c])) != c)
2407 changed = 1;
2408 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002409 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 return result;
2411 Py_DECREF(result);
2412 Py_INCREF(input_obj);
2413 return input_obj;
2414 }
2415
2416 for (i = 0; i < 256; i++)
2417 trans_table[i] = Py_CHARMASK(table[i]);
2418
2419 for (i = 0; i < dellen; i++)
2420 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2421
2422 for (i = inlen; --i >= 0; ) {
2423 c = Py_CHARMASK(*input++);
2424 if (trans_table[c] != -1)
2425 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2426 continue;
2427 changed = 1;
2428 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002429 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 Py_DECREF(result);
2431 Py_INCREF(input_obj);
2432 return input_obj;
2433 }
2434 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002435 if (inlen > 0)
2436 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437 return result;
2438}
2439
2440
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002441#define FORWARD 1
2442#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002444/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002446#define findchar(target, target_len, c) \
2447 ((char *)memchr((const void *)(target), c, target_len))
2448
2449/* String ops must return a string. */
2450/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002451Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002452return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002454 if (PyString_CheckExact(self)) {
2455 Py_INCREF(self);
2456 return self;
2457 }
2458 return (PyStringObject *)PyString_FromStringAndSize(
2459 PyString_AS_STRING(self),
2460 PyString_GET_SIZE(self));
2461}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002463Py_LOCAL(Py_ssize_t)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002464countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002465{
2466 Py_ssize_t count=0;
2467 char *start=target;
2468 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002470 while ( (start=findchar(start, end-start, c)) != NULL ) {
2471 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002472 if (count >= maxcount)
2473 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002474 start += 1;
2475 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002476 return count;
2477}
2478
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002479Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002480findstring(char *target, Py_ssize_t target_len,
2481 char *pattern, Py_ssize_t pattern_len,
2482 Py_ssize_t start,
2483 Py_ssize_t end,
2484 int direction)
2485{
2486 if (start < 0) {
2487 start += target_len;
2488 if (start < 0)
2489 start = 0;
2490 }
2491 if (end > target_len) {
2492 end = target_len;
2493 } else if (end < 0) {
2494 end += target_len;
2495 if (end < 0)
2496 end = 0;
2497 }
2498
2499 /* zero-length substrings always match at the first attempt */
2500 if (pattern_len == 0)
2501 return (direction > 0) ? start : end;
2502
2503 end -= pattern_len;
2504
2505 if (direction < 0) {
2506 for (; end >= start; end--)
2507 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2508 return end;
2509 } else {
2510 for (; start <= end; start++)
2511 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2512 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513 }
2514 return -1;
2515}
2516
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002517Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002518countstring(char *target, Py_ssize_t target_len,
2519 char *pattern, Py_ssize_t pattern_len,
2520 Py_ssize_t start,
2521 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002522 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002524 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002526 if (start < 0) {
2527 start += target_len;
2528 if (start < 0)
2529 start = 0;
2530 }
2531 if (end > target_len) {
2532 end = target_len;
2533 } else if (end < 0) {
2534 end += target_len;
2535 if (end < 0)
2536 end = 0;
2537 }
2538
2539 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002540 if (pattern_len == 0 || maxcount == 0) {
2541 if (target_len+1 < maxcount)
2542 return target_len+1;
2543 return maxcount;
2544 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002545
2546 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002547 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002548 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002549 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2550 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002551 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002552 end -= pattern_len-1;
2553 }
2554 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002555 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002556 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2557 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002558 if (--maxcount <= 0)
2559 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560 start += pattern_len-1;
2561 }
2562 }
2563 return count;
2564}
2565
2566
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002567/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568
2569/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002570Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002571replace_interleave(PyStringObject *self,
2572 PyStringObject *to,
2573 Py_ssize_t maxcount)
2574{
2575 char *self_s, *to_s, *result_s;
2576 Py_ssize_t self_len, to_len, result_len;
2577 Py_ssize_t count, i, product;
2578 PyStringObject *result;
2579
2580 self_len = PyString_GET_SIZE(self);
2581 to_len = PyString_GET_SIZE(to);
2582
2583 /* 1 at the end plus 1 after every character */
2584 count = self_len+1;
2585 if (maxcount < count)
2586 count = maxcount;
2587
2588 /* Check for overflow */
2589 /* result_len = count * to_len + self_len; */
2590 product = count * to_len;
2591 if (product / to_len != count) {
2592 PyErr_SetString(PyExc_OverflowError,
2593 "replace string is too long");
2594 return NULL;
2595 }
2596 result_len = product + self_len;
2597 if (result_len < 0) {
2598 PyErr_SetString(PyExc_OverflowError,
2599 "replace string is too long");
2600 return NULL;
2601 }
2602
2603 if (! (result = (PyStringObject *)
2604 PyString_FromStringAndSize(NULL, result_len)) )
2605 return NULL;
2606
2607 self_s = PyString_AS_STRING(self);
2608 to_s = PyString_AS_STRING(to);
2609 to_len = PyString_GET_SIZE(to);
2610 result_s = PyString_AS_STRING(result);
2611
2612 /* TODO: special case single character, which doesn't need memcpy */
2613
2614 /* Lay the first one down (guaranteed this will occur) */
2615 memcpy(result_s, to_s, to_len);
2616 result_s += to_len;
2617 count -= 1;
2618
2619 for (i=0; i<count; i++) {
2620 *result_s++ = *self_s++;
2621 memcpy(result_s, to_s, to_len);
2622 result_s += to_len;
2623 }
2624
2625 /* Copy the rest of the original string */
2626 memcpy(result_s, self_s, self_len-i);
2627
2628 return result;
2629}
2630
2631/* Special case for deleting a single character */
2632/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002633Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002634replace_delete_single_character(PyStringObject *self,
2635 char from_c, Py_ssize_t maxcount)
2636{
2637 char *self_s, *result_s;
2638 char *start, *next, *end;
2639 Py_ssize_t self_len, result_len;
2640 Py_ssize_t count;
2641 PyStringObject *result;
2642
2643 self_len = PyString_GET_SIZE(self);
2644 self_s = PyString_AS_STRING(self);
2645
Andrew Dalke51324072006-05-26 20:25:22 +00002646 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002647 if (count == 0) {
2648 return return_self(self);
2649 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650
2651 result_len = self_len - count; /* from_len == 1 */
2652 assert(result_len>=0);
2653
2654 if ( (result = (PyStringObject *)
2655 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2656 return NULL;
2657 result_s = PyString_AS_STRING(result);
2658
2659 start = self_s;
2660 end = self_s + self_len;
2661 while (count-- > 0) {
2662 next = findchar(start, end-start, from_c);
2663 if (next == NULL)
2664 break;
2665 memcpy(result_s, start, next-start);
2666 result_s += (next-start);
2667 start = next+1;
2668 }
2669 memcpy(result_s, start, end-start);
2670
2671 return result;
2672}
2673
2674/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2675
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002676Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677replace_delete_substring(PyStringObject *self, PyStringObject *from,
2678 Py_ssize_t maxcount) {
2679 char *self_s, *from_s, *result_s;
2680 char *start, *next, *end;
2681 Py_ssize_t self_len, from_len, result_len;
2682 Py_ssize_t count, offset;
2683 PyStringObject *result;
2684
2685 self_len = PyString_GET_SIZE(self);
2686 self_s = PyString_AS_STRING(self);
2687 from_len = PyString_GET_SIZE(from);
2688 from_s = PyString_AS_STRING(from);
2689
2690 count = countstring(self_s, self_len,
2691 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002692 0, self_len, 1,
2693 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002694
2695 if (count == 0) {
2696 /* no matches */
2697 return return_self(self);
2698 }
2699
2700 result_len = self_len - (count * from_len);
2701 assert (result_len>=0);
2702
2703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2705 return NULL;
2706
2707 result_s = PyString_AS_STRING(result);
2708
2709 start = self_s;
2710 end = self_s + self_len;
2711 while (count-- > 0) {
2712 offset = findstring(start, end-start,
2713 from_s, from_len,
2714 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002715 if (offset == -1)
2716 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 next = start + offset;
2718
2719 memcpy(result_s, start, next-start);
2720
2721 result_s += (next-start);
2722 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002723 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724 memcpy(result_s, start, end-start);
2725 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002726}
2727
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002728/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002729Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002730replace_single_character_in_place(PyStringObject *self,
2731 char from_c, char to_c,
2732 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002734 char *self_s, *result_s, *start, *end, *next;
2735 Py_ssize_t self_len;
2736 PyStringObject *result;
2737
2738 /* The result string will be the same size */
2739 self_s = PyString_AS_STRING(self);
2740 self_len = PyString_GET_SIZE(self);
2741
2742 next = findchar(self_s, self_len, from_c);
2743
2744 if (next == NULL) {
2745 /* No matches; return the original string */
2746 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002747 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002748
2749 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002750 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751 if (result == NULL)
2752 return NULL;
2753 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002754 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002755
2756 /* change everything in-place, starting with this one */
2757 start = result_s + (next-self_s);
2758 *start = to_c;
2759 start++;
2760 end = result_s + self_len;
2761
2762 while (--maxcount > 0) {
2763 next = findchar(start, end-start, from_c);
2764 if (next == NULL)
2765 break;
2766 *next = to_c;
2767 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002768 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769
2770 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002771}
2772
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002774Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002775replace_substring_in_place(PyStringObject *self,
2776 PyStringObject *from,
2777 PyStringObject *to,
2778 Py_ssize_t maxcount)
2779{
2780 char *result_s, *start, *end;
2781 char *self_s, *from_s, *to_s;
2782 Py_ssize_t self_len, from_len, offset;
2783 PyStringObject *result;
2784
2785 /* The result string will be the same size */
2786
2787 self_s = PyString_AS_STRING(self);
2788 self_len = PyString_GET_SIZE(self);
2789
2790 from_s = PyString_AS_STRING(from);
2791 from_len = PyString_GET_SIZE(from);
2792 to_s = PyString_AS_STRING(to);
2793
2794 offset = findstring(self_s, self_len,
2795 from_s, from_len,
2796 0, self_len, FORWARD);
2797
2798 if (offset == -1) {
2799 /* No matches; return the original string */
2800 return return_self(self);
2801 }
2802
2803 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002804 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 if (result == NULL)
2806 return NULL;
2807 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002808 memcpy(result_s, self_s, self_len);
2809
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810
2811 /* change everything in-place, starting with this one */
2812 start = result_s + offset;
2813 memcpy(start, to_s, from_len);
2814 start += from_len;
2815 end = result_s + self_len;
2816
2817 while ( --maxcount > 0) {
2818 offset = findstring(start, end-start,
2819 from_s, from_len,
2820 0, end-start, FORWARD);
2821 if (offset==-1)
2822 break;
2823 memcpy(start+offset, to_s, from_len);
2824 start += offset+from_len;
2825 }
2826
2827 return result;
2828}
2829
2830/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002831Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832replace_single_character(PyStringObject *self,
2833 char from_c,
2834 PyStringObject *to,
2835 Py_ssize_t maxcount)
2836{
2837 char *self_s, *to_s, *result_s;
2838 char *start, *next, *end;
2839 Py_ssize_t self_len, to_len, result_len;
2840 Py_ssize_t count, product;
2841 PyStringObject *result;
2842
2843 self_s = PyString_AS_STRING(self);
2844 self_len = PyString_GET_SIZE(self);
2845
Andrew Dalke51324072006-05-26 20:25:22 +00002846 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847
2848 if (count == 0) {
2849 /* no matches, return unchanged */
2850 return return_self(self);
2851 }
2852
2853 to_s = PyString_AS_STRING(to);
2854 to_len = PyString_GET_SIZE(to);
2855
2856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product = count * (to_len-1);
2859 if (product / (to_len-1) != count) {
2860 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861 return NULL;
2862 }
2863 result_len = self_len + product;
2864 if (result_len < 0) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2867 }
2868
2869 if ( (result = (PyStringObject *)
2870 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871 return NULL;
2872 result_s = PyString_AS_STRING(result);
2873
2874 start = self_s;
2875 end = self_s + self_len;
2876 while (count-- > 0) {
2877 next = findchar(start, end-start, from_c);
2878 if (next == NULL)
2879 break;
2880
2881 if (next == start) {
2882 /* replace with the 'to' */
2883 memcpy(result_s, to_s, to_len);
2884 result_s += to_len;
2885 start += 1;
2886 } else {
2887 /* copy the unchanged old then the 'to' */
2888 memcpy(result_s, start, next-start);
2889 result_s += (next-start);
2890 memcpy(result_s, to_s, to_len);
2891 result_s += to_len;
2892 start = next+1;
2893 }
2894 }
2895 /* Copy the remainder of the remaining string */
2896 memcpy(result_s, start, end-start);
2897
2898 return result;
2899}
2900
2901/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002902Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903replace_substring(PyStringObject *self,
2904 PyStringObject *from,
2905 PyStringObject *to,
2906 Py_ssize_t maxcount) {
2907 char *self_s, *from_s, *to_s, *result_s;
2908 char *start, *next, *end;
2909 Py_ssize_t self_len, from_len, to_len, result_len;
2910 Py_ssize_t count, offset, product;
2911 PyStringObject *result;
2912
2913 self_s = PyString_AS_STRING(self);
2914 self_len = PyString_GET_SIZE(self);
2915 from_s = PyString_AS_STRING(from);
2916 from_len = PyString_GET_SIZE(from);
2917
2918 count = countstring(self_s, self_len,
2919 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002920 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002921 if (count == 0) {
2922 /* no matches, return unchanged */
2923 return return_self(self);
2924 }
2925
2926 to_s = PyString_AS_STRING(to);
2927 to_len = PyString_GET_SIZE(to);
2928
2929 /* Check for overflow */
2930 /* result_len = self_len + count * (to_len-from_len) */
2931 product = count * (to_len-from_len);
2932 if (product / (to_len-from_len) != count) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
2936 result_len = self_len + product;
2937 if (result_len < 0) {
2938 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2939 return NULL;
2940 }
2941
2942 if ( (result = (PyStringObject *)
2943 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2944 return NULL;
2945 result_s = PyString_AS_STRING(result);
2946
2947 start = self_s;
2948 end = self_s + self_len;
2949 while (count-- > 0) {
2950 offset = findstring(start, end-start,
2951 from_s, from_len,
2952 0, end-start, FORWARD);
2953 if (offset == -1)
2954 break;
2955 next = start+offset;
2956 if (next == start) {
2957 /* replace with the 'to' */
2958 memcpy(result_s, to_s, to_len);
2959 result_s += to_len;
2960 start += from_len;
2961 } else {
2962 /* copy the unchanged old then the 'to' */
2963 memcpy(result_s, start, next-start);
2964 result_s += (next-start);
2965 memcpy(result_s, to_s, to_len);
2966 result_s += to_len;
2967 start = next+from_len;
2968 }
2969 }
2970 /* Copy the remainder of the remaining string */
2971 memcpy(result_s, start, end-start);
2972
2973 return result;
2974}
2975
2976
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002977Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978replace(PyStringObject *self,
2979 PyStringObject *from,
2980 PyStringObject *to,
2981 Py_ssize_t maxcount)
2982{
2983 Py_ssize_t from_len, to_len;
2984
2985 if (maxcount < 0) {
2986 maxcount = PY_SSIZE_T_MAX;
2987 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2988 /* nothing to do; return the original string */
2989 return return_self(self);
2990 }
2991
2992 from_len = PyString_GET_SIZE(from);
2993 to_len = PyString_GET_SIZE(to);
2994
2995 if (maxcount == 0 ||
2996 (from_len == 0 && to_len == 0)) {
2997 /* nothing to do; return the original string */
2998 return return_self(self);
2999 }
3000
3001 /* Handle zero-length special cases */
3002
3003 if (from_len == 0) {
3004 /* insert the 'to' string everywhere. */
3005 /* >>> "Python".replace("", ".") */
3006 /* '.P.y.t.h.o.n.' */
3007 return replace_interleave(self, to, maxcount);
3008 }
3009
3010 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3011 /* point for an empty self string to generate a non-empty string */
3012 /* Special case so the remaining code always gets a non-empty string */
3013 if (PyString_GET_SIZE(self) == 0) {
3014 return return_self(self);
3015 }
3016
3017 if (to_len == 0) {
3018 /* delete all occurances of 'from' string */
3019 if (from_len == 1) {
3020 return replace_delete_single_character(
3021 self, PyString_AS_STRING(from)[0], maxcount);
3022 } else {
3023 return replace_delete_substring(self, from, maxcount);
3024 }
3025 }
3026
3027 /* Handle special case where both strings have the same length */
3028
3029 if (from_len == to_len) {
3030 if (from_len == 1) {
3031 return replace_single_character_in_place(
3032 self,
3033 PyString_AS_STRING(from)[0],
3034 PyString_AS_STRING(to)[0],
3035 maxcount);
3036 } else {
3037 return replace_substring_in_place(
3038 self, from, to, maxcount);
3039 }
3040 }
3041
3042 /* Otherwise use the more generic algorithms */
3043 if (from_len == 1) {
3044 return replace_single_character(self, PyString_AS_STRING(from)[0],
3045 to, maxcount);
3046 } else {
3047 /* len('from')>=2, len('to')>=1 */
3048 return replace_substring(self, from, to, maxcount);
3049 }
3050}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003052PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003053"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054\n\
3055Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003056old replaced by new. If the optional argument count is\n\
3057given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058
3059static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003060string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003061{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003062 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003063 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003064 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003066
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003067 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003068 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 if (PyString_Check(from)) {
3071 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003073#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003074 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003075 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003077#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079 return NULL;
3080
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 if (PyString_Check(to)) {
3082 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003084#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003085 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003086 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003087 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003088#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 return NULL;
3091
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 return (PyObject *)replace((PyStringObject *) self,
3093 (PyStringObject *) from,
3094 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003095}
3096
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003097/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003099PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003100"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003101\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003102Return True if S starts with the specified prefix, False otherwise.\n\
3103With optional start, test S beginning at that position.\n\
3104With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003105
3106static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003107string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003108{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003110 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003112 Py_ssize_t plen;
3113 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003114 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003116
Guido van Rossumc6821402000-05-08 14:08:05 +00003117 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3118 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003119 return NULL;
3120 if (PyString_Check(subobj)) {
3121 prefix = PyString_AS_STRING(subobj);
3122 plen = PyString_GET_SIZE(subobj);
3123 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003124#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003125 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003126 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003127 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003128 subobj, start, end, -1);
3129 if (rc == -1)
3130 return NULL;
3131 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003133 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003134#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136 return NULL;
3137
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003138 string_adjust_indices(&start, &end, len);
3139
3140 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003141 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003142
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003143 if (end-start >= plen)
3144 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3145 else
3146 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147}
3148
3149
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003150PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003151"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003153Return True if S ends with the specified suffix, False otherwise.\n\
3154With optional start, test S beginning at that position.\n\
3155With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003156
3157static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003158string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003159{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003161 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003163 Py_ssize_t slen;
3164 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003165 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003167
Guido van Rossumc6821402000-05-08 14:08:05 +00003168 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3169 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003170 return NULL;
3171 if (PyString_Check(subobj)) {
3172 suffix = PyString_AS_STRING(subobj);
3173 slen = PyString_GET_SIZE(subobj);
3174 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003175#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003176 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003177 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003178 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003179 subobj, start, end, +1);
3180 if (rc == -1)
3181 return NULL;
3182 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003183 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003184 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003185#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003187 return NULL;
3188
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003189 string_adjust_indices(&start, &end, len);
3190
3191 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003192 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003194 if (end-slen > start)
3195 start = end - slen;
3196 if (end-start >= slen)
3197 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3198 else
3199 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003200}
3201
3202
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003203PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003204"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003205\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003206Encodes S using the codec registered for encoding. encoding defaults\n\
3207to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003208handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003209a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3210'xmlcharrefreplace' as well as any other name registered with\n\
3211codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003212
3213static PyObject *
3214string_encode(PyStringObject *self, PyObject *args)
3215{
3216 char *encoding = NULL;
3217 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003218 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003219
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003220 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3221 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003222 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003223 if (v == NULL)
3224 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003225 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3226 PyErr_Format(PyExc_TypeError,
3227 "encoder did not return a string/unicode object "
3228 "(type=%.400s)",
3229 v->ob_type->tp_name);
3230 Py_DECREF(v);
3231 return NULL;
3232 }
3233 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003234
3235 onError:
3236 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003237}
3238
3239
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003240PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003241"S.decode([encoding[,errors]]) -> object\n\
3242\n\
3243Decodes S using the codec registered for encoding. encoding defaults\n\
3244to the default encoding. errors may be given to set a different error\n\
3245handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003246a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3247as well as any other name registerd with codecs.register_error that is\n\
3248able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003249
3250static PyObject *
3251string_decode(PyStringObject *self, PyObject *args)
3252{
3253 char *encoding = NULL;
3254 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003255 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003256
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3258 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003259 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003260 if (v == NULL)
3261 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003262 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3263 PyErr_Format(PyExc_TypeError,
3264 "decoder did not return a string/unicode object "
3265 "(type=%.400s)",
3266 v->ob_type->tp_name);
3267 Py_DECREF(v);
3268 return NULL;
3269 }
3270 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003271
3272 onError:
3273 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003274}
3275
3276
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003277PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003278"S.expandtabs([tabsize]) -> string\n\
3279\n\
3280Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003281If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003282
3283static PyObject*
3284string_expandtabs(PyStringObject *self, PyObject *args)
3285{
3286 const char *e, *p;
3287 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003288 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003289 PyObject *u;
3290 int tabsize = 8;
3291
3292 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3293 return NULL;
3294
Thomas Wouters7e474022000-07-16 12:04:32 +00003295 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003296 i = j = 0;
3297 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3298 for (p = PyString_AS_STRING(self); p < e; p++)
3299 if (*p == '\t') {
3300 if (tabsize > 0)
3301 j += tabsize - (j % tabsize);
3302 }
3303 else {
3304 j++;
3305 if (*p == '\n' || *p == '\r') {
3306 i += j;
3307 j = 0;
3308 }
3309 }
3310
3311 /* Second pass: create output string and fill it */
3312 u = PyString_FromStringAndSize(NULL, i + j);
3313 if (!u)
3314 return NULL;
3315
3316 j = 0;
3317 q = PyString_AS_STRING(u);
3318
3319 for (p = PyString_AS_STRING(self); p < e; p++)
3320 if (*p == '\t') {
3321 if (tabsize > 0) {
3322 i = tabsize - (j % tabsize);
3323 j += i;
3324 while (i--)
3325 *q++ = ' ';
3326 }
3327 }
3328 else {
3329 j++;
3330 *q++ = *p;
3331 if (*p == '\n' || *p == '\r')
3332 j = 0;
3333 }
3334
3335 return u;
3336}
3337
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003338Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003339pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003340{
3341 PyObject *u;
3342
3343 if (left < 0)
3344 left = 0;
3345 if (right < 0)
3346 right = 0;
3347
Tim Peters8fa5dd02001-09-12 02:18:30 +00003348 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003349 Py_INCREF(self);
3350 return (PyObject *)self;
3351 }
3352
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003353 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003354 left + PyString_GET_SIZE(self) + right);
3355 if (u) {
3356 if (left)
3357 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003358 memcpy(PyString_AS_STRING(u) + left,
3359 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003360 PyString_GET_SIZE(self));
3361 if (right)
3362 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3363 fill, right);
3364 }
3365
3366 return u;
3367}
3368
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003369PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003370"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003371"\n"
3372"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003373"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003374
3375static PyObject *
3376string_ljust(PyStringObject *self, PyObject *args)
3377{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003378 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003379 char fillchar = ' ';
3380
Thomas Wouters4abb3662006-04-19 14:50:15 +00003381 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382 return NULL;
3383
Tim Peters8fa5dd02001-09-12 02:18:30 +00003384 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003385 Py_INCREF(self);
3386 return (PyObject*) self;
3387 }
3388
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003389 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390}
3391
3392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003393PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003394"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003395"\n"
3396"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003397"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398
3399static PyObject *
3400string_rjust(PyStringObject *self, PyObject *args)
3401{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003402 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003403 char fillchar = ' ';
3404
Thomas Wouters4abb3662006-04-19 14:50:15 +00003405 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 return NULL;
3407
Tim Peters8fa5dd02001-09-12 02:18:30 +00003408 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409 Py_INCREF(self);
3410 return (PyObject*) self;
3411 }
3412
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003413 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414}
3415
3416
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003417PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003418"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003419"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003420"Return S centered in a string of length width. Padding is\n"
3421"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422
3423static PyObject *
3424string_center(PyStringObject *self, PyObject *args)
3425{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003426 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003427 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003428 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429
Thomas Wouters4abb3662006-04-19 14:50:15 +00003430 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 return NULL;
3432
Tim Peters8fa5dd02001-09-12 02:18:30 +00003433 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434 Py_INCREF(self);
3435 return (PyObject*) self;
3436 }
3437
3438 marg = width - PyString_GET_SIZE(self);
3439 left = marg / 2 + (marg & width & 1);
3440
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003441 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442}
3443
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003444PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003445"S.zfill(width) -> string\n"
3446"\n"
3447"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003448"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003449
3450static PyObject *
3451string_zfill(PyStringObject *self, PyObject *args)
3452{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003453 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003454 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003455 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003456 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003457
Thomas Wouters4abb3662006-04-19 14:50:15 +00003458 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003459 return NULL;
3460
3461 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003462 if (PyString_CheckExact(self)) {
3463 Py_INCREF(self);
3464 return (PyObject*) self;
3465 }
3466 else
3467 return PyString_FromStringAndSize(
3468 PyString_AS_STRING(self),
3469 PyString_GET_SIZE(self)
3470 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003471 }
3472
3473 fill = width - PyString_GET_SIZE(self);
3474
3475 s = pad(self, fill, 0, '0');
3476
3477 if (s == NULL)
3478 return NULL;
3479
3480 p = PyString_AS_STRING(s);
3481 if (p[fill] == '+' || p[fill] == '-') {
3482 /* move sign to beginning of string */
3483 p[0] = p[fill];
3484 p[fill] = '0';
3485 }
3486
3487 return (PyObject*) s;
3488}
3489
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003490PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003491"S.isspace() -> bool\n\
3492\n\
3493Return True if all characters in S are whitespace\n\
3494and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495
3496static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003497string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498{
Fred Drakeba096332000-07-09 07:04:36 +00003499 register const unsigned char *p
3500 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003501 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503 /* Shortcut for single character strings */
3504 if (PyString_GET_SIZE(self) == 1 &&
3505 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003506 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003507
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003508 /* Special case for empty strings */
3509 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003510 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003511
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512 e = p + PyString_GET_SIZE(self);
3513 for (; p < e; p++) {
3514 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003515 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003517 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518}
3519
3520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003521PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003523\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003524Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003525and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003526
3527static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003528string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003529{
Fred Drakeba096332000-07-09 07:04:36 +00003530 register const unsigned char *p
3531 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003532 register const unsigned char *e;
3533
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003534 /* Shortcut for single character strings */
3535 if (PyString_GET_SIZE(self) == 1 &&
3536 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003537 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003538
3539 /* Special case for empty strings */
3540 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003541 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003542
3543 e = p + PyString_GET_SIZE(self);
3544 for (; p < e; p++) {
3545 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003546 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003547 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003549}
3550
3551
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003552PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003553"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003554\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003555Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003556and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003557
3558static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003559string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003560{
Fred Drakeba096332000-07-09 07:04:36 +00003561 register const unsigned char *p
3562 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563 register const unsigned char *e;
3564
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565 /* Shortcut for single character strings */
3566 if (PyString_GET_SIZE(self) == 1 &&
3567 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003568 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003569
3570 /* Special case for empty strings */
3571 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003572 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573
3574 e = p + PyString_GET_SIZE(self);
3575 for (; p < e; p++) {
3576 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003579 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003580}
3581
3582
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003583PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003586Return True if all characters in S are digits\n\
3587and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588
3589static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003590string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003591{
Fred Drakeba096332000-07-09 07:04:36 +00003592 register const unsigned char *p
3593 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003594 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595
Guido van Rossum4c08d552000-03-10 22:55:18 +00003596 /* Shortcut for single character strings */
3597 if (PyString_GET_SIZE(self) == 1 &&
3598 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003600
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003601 /* Special case for empty strings */
3602 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003603 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003604
Guido van Rossum4c08d552000-03-10 22:55:18 +00003605 e = p + PyString_GET_SIZE(self);
3606 for (; p < e; p++) {
3607 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611}
3612
3613
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003614PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003618at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619
3620static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003621string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622{
Fred Drakeba096332000-07-09 07:04:36 +00003623 register const unsigned char *p
3624 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003625 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626 int cased;
3627
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628 /* Shortcut for single character strings */
3629 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003632 /* Special case for empty strings */
3633 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003635
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636 e = p + PyString_GET_SIZE(self);
3637 cased = 0;
3638 for (; p < e; p++) {
3639 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 else if (!cased && islower(*p))
3642 cased = 1;
3643 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645}
3646
3647
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003648PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003651Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003652at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653
3654static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003655string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656{
Fred Drakeba096332000-07-09 07:04:36 +00003657 register const unsigned char *p
3658 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003659 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 int cased;
3661
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662 /* Shortcut for single character strings */
3663 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003666 /* Special case for empty strings */
3667 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003669
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 e = p + PyString_GET_SIZE(self);
3671 cased = 0;
3672 for (; p < e; p++) {
3673 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003675 else if (!cased && isupper(*p))
3676 cased = 1;
3677 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003678 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003679}
3680
3681
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003682PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003683"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003684\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003685Return True if S is a titlecased string and there is at least one\n\
3686character in S, i.e. uppercase characters may only follow uncased\n\
3687characters and lowercase characters only cased ones. Return False\n\
3688otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689
3690static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003691string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692{
Fred Drakeba096332000-07-09 07:04:36 +00003693 register const unsigned char *p
3694 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003695 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 int cased, previous_is_cased;
3697
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698 /* Shortcut for single character strings */
3699 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003700 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003701
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003702 /* Special case for empty strings */
3703 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003705
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706 e = p + PyString_GET_SIZE(self);
3707 cased = 0;
3708 previous_is_cased = 0;
3709 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003710 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711
3712 if (isupper(ch)) {
3713 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715 previous_is_cased = 1;
3716 cased = 1;
3717 }
3718 else if (islower(ch)) {
3719 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721 previous_is_cased = 1;
3722 cased = 1;
3723 }
3724 else
3725 previous_is_cased = 0;
3726 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728}
3729
3730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003731PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003732"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733\n\
3734Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003735Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003736is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738static PyObject*
3739string_splitlines(PyStringObject *self, PyObject *args)
3740{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003741 register Py_ssize_t i;
3742 register Py_ssize_t j;
3743 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003744 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 PyObject *list;
3746 PyObject *str;
3747 char *data;
3748
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003749 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750 return NULL;
3751
3752 data = PyString_AS_STRING(self);
3753 len = PyString_GET_SIZE(self);
3754
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003755 /* This does not use the preallocated list because splitlines is
3756 usually run with hundreds of newlines. The overhead of
3757 switching between PyList_SET_ITEM and append causes about a
3758 2-3% slowdown for that common case. A smarter implementation
3759 could move the if check out, so the SET_ITEMs are done first
3760 and the appends only done when the prealloc buffer is full.
3761 That's too much work for little gain.*/
3762
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763 list = PyList_New(0);
3764 if (!list)
3765 goto onError;
3766
3767 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003768 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003769
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770 /* Find a line and append it */
3771 while (i < len && data[i] != '\n' && data[i] != '\r')
3772 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773
3774 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003775 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776 if (i < len) {
3777 if (data[i] == '\r' && i + 1 < len &&
3778 data[i+1] == '\n')
3779 i += 2;
3780 else
3781 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003782 if (keepends)
3783 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003785 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 j = i;
3787 }
3788 if (j < len) {
3789 SPLIT_APPEND(data, j, len);
3790 }
3791
3792 return list;
3793
3794 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003795 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796 return NULL;
3797}
3798
3799#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003800#undef SPLIT_ADD
3801#undef MAX_PREALLOC
3802#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003804static PyObject *
3805string_getnewargs(PyStringObject *v)
3806{
3807 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3808}
3809
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003810
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003811static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003812string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813 /* Counterparts of the obsolete stropmodule functions; except
3814 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003815 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3816 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003817 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003818 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3819 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003820 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3821 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3822 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3823 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3824 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3825 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3826 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003827 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3828 capitalize__doc__},
3829 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3830 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3831 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003832 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003833 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3834 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3835 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3836 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3837 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3838 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3839 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003840 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3841 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003842 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3843 startswith__doc__},
3844 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3845 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3846 swapcase__doc__},
3847 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3848 translate__doc__},
3849 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3850 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3851 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3852 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3853 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3854 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3855 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3856 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3857 expandtabs__doc__},
3858 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3859 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003860 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003861 {NULL, NULL} /* sentinel */
3862};
3863
Jeremy Hylton938ace62002-07-17 16:30:39 +00003864static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003865str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3866
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003867static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003868string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003869{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003870 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003871 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003872
Guido van Rossumae960af2001-08-30 03:11:59 +00003873 if (type != &PyString_Type)
3874 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003875 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3876 return NULL;
3877 if (x == NULL)
3878 return PyString_FromString("");
3879 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003880}
3881
Guido van Rossumae960af2001-08-30 03:11:59 +00003882static PyObject *
3883str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3884{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003885 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003886 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003887
3888 assert(PyType_IsSubtype(type, &PyString_Type));
3889 tmp = string_new(&PyString_Type, args, kwds);
3890 if (tmp == NULL)
3891 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003892 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003893 n = PyString_GET_SIZE(tmp);
3894 pnew = type->tp_alloc(type, n);
3895 if (pnew != NULL) {
3896 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003897 ((PyStringObject *)pnew)->ob_shash =
3898 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003899 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003900 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003901 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003902 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003903}
3904
Guido van Rossumcacfc072002-05-24 19:01:59 +00003905static PyObject *
3906basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3907{
3908 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003909 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003910 return NULL;
3911}
3912
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003913static PyObject *
3914string_mod(PyObject *v, PyObject *w)
3915{
3916 if (!PyString_Check(v)) {
3917 Py_INCREF(Py_NotImplemented);
3918 return Py_NotImplemented;
3919 }
3920 return PyString_Format(v, w);
3921}
3922
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003923PyDoc_STRVAR(basestring_doc,
3924"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003925
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003926static PyNumberMethods string_as_number = {
3927 0, /*nb_add*/
3928 0, /*nb_subtract*/
3929 0, /*nb_multiply*/
3930 0, /*nb_divide*/
3931 string_mod, /*nb_remainder*/
3932};
3933
3934
Guido van Rossumcacfc072002-05-24 19:01:59 +00003935PyTypeObject PyBaseString_Type = {
3936 PyObject_HEAD_INIT(&PyType_Type)
3937 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003938 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003939 0,
3940 0,
3941 0, /* tp_dealloc */
3942 0, /* tp_print */
3943 0, /* tp_getattr */
3944 0, /* tp_setattr */
3945 0, /* tp_compare */
3946 0, /* tp_repr */
3947 0, /* tp_as_number */
3948 0, /* tp_as_sequence */
3949 0, /* tp_as_mapping */
3950 0, /* tp_hash */
3951 0, /* tp_call */
3952 0, /* tp_str */
3953 0, /* tp_getattro */
3954 0, /* tp_setattro */
3955 0, /* tp_as_buffer */
3956 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3957 basestring_doc, /* tp_doc */
3958 0, /* tp_traverse */
3959 0, /* tp_clear */
3960 0, /* tp_richcompare */
3961 0, /* tp_weaklistoffset */
3962 0, /* tp_iter */
3963 0, /* tp_iternext */
3964 0, /* tp_methods */
3965 0, /* tp_members */
3966 0, /* tp_getset */
3967 &PyBaseObject_Type, /* tp_base */
3968 0, /* tp_dict */
3969 0, /* tp_descr_get */
3970 0, /* tp_descr_set */
3971 0, /* tp_dictoffset */
3972 0, /* tp_init */
3973 0, /* tp_alloc */
3974 basestring_new, /* tp_new */
3975 0, /* tp_free */
3976};
3977
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003978PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003979"str(object) -> string\n\
3980\n\
3981Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003982If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003983
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984PyTypeObject PyString_Type = {
3985 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003986 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003987 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003988 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003989 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003990 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003991 (printfunc)string_print, /* tp_print */
3992 0, /* tp_getattr */
3993 0, /* tp_setattr */
3994 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003995 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003996 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003997 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003998 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003999 (hashfunc)string_hash, /* tp_hash */
4000 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004001 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004002 PyObject_GenericGetAttr, /* tp_getattro */
4003 0, /* tp_setattro */
4004 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004005 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004006 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007 string_doc, /* tp_doc */
4008 0, /* tp_traverse */
4009 0, /* tp_clear */
4010 (richcmpfunc)string_richcompare, /* tp_richcompare */
4011 0, /* tp_weaklistoffset */
4012 0, /* tp_iter */
4013 0, /* tp_iternext */
4014 string_methods, /* tp_methods */
4015 0, /* tp_members */
4016 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004017 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004018 0, /* tp_dict */
4019 0, /* tp_descr_get */
4020 0, /* tp_descr_set */
4021 0, /* tp_dictoffset */
4022 0, /* tp_init */
4023 0, /* tp_alloc */
4024 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004025 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004026};
4027
4028void
Fred Drakeba096332000-07-09 07:04:36 +00004029PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004030{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004031 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004032 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004033 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004034 if (w == NULL || !PyString_Check(*pv)) {
4035 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004036 *pv = NULL;
4037 return;
4038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004039 v = string_concat((PyStringObject *) *pv, w);
4040 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004041 *pv = v;
4042}
4043
Guido van Rossum013142a1994-08-30 08:19:36 +00004044void
Fred Drakeba096332000-07-09 07:04:36 +00004045PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004046{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 PyString_Concat(pv, w);
4048 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004049}
4050
4051
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004052/* The following function breaks the notion that strings are immutable:
4053 it changes the size of a string. We get away with this only if there
4054 is only one module referencing the object. You can also think of it
4055 as creating a new string object and destroying the old one, only
4056 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004057 already be known to some other part of the code...
4058 Note that if there's not enough memory to resize the string, the original
4059 string object at *pv is deallocated, *pv is set to NULL, an "out of
4060 memory" exception is set, and -1 is returned. Else (on success) 0 is
4061 returned, and the value in *pv may or may not be the same as on input.
4062 As always, an extra byte is allocated for a trailing \0 byte (newsize
4063 does *not* include that), and a trailing \0 byte is stored.
4064*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065
4066int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004067_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004068{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004069 register PyObject *v;
4070 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004071 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004072 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4073 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004074 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004075 Py_DECREF(v);
4076 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004077 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004079 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004080 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004081 _Py_ForgetReference(v);
4082 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004083 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004085 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004086 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004087 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004089 _Py_NewReference(*pv);
4090 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004091 sv->ob_size = newsize;
4092 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004093 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004094 return 0;
4095}
Guido van Rossume5372401993-03-16 12:15:04 +00004096
4097/* Helpers for formatstring */
4098
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004099Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004100getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004101{
Thomas Wouters977485d2006-02-16 15:59:12 +00004102 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004103 if (argidx < arglen) {
4104 (*p_argidx)++;
4105 if (arglen < 0)
4106 return args;
4107 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004108 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004110 PyErr_SetString(PyExc_TypeError,
4111 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004112 return NULL;
4113}
4114
Tim Peters38fd5b62000-09-21 05:43:11 +00004115/* Format codes
4116 * F_LJUST '-'
4117 * F_SIGN '+'
4118 * F_BLANK ' '
4119 * F_ALT '#'
4120 * F_ZERO '0'
4121 */
Guido van Rossume5372401993-03-16 12:15:04 +00004122#define F_LJUST (1<<0)
4123#define F_SIGN (1<<1)
4124#define F_BLANK (1<<2)
4125#define F_ALT (1<<3)
4126#define F_ZERO (1<<4)
4127
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004128Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004129formatfloat(char *buf, size_t buflen, int flags,
4130 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004131{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004132 /* fmt = '%#.' + `prec` + `type`
4133 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004134 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004135 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004136 x = PyFloat_AsDouble(v);
4137 if (x == -1.0 && PyErr_Occurred()) {
4138 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004139 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004140 }
Guido van Rossume5372401993-03-16 12:15:04 +00004141 if (prec < 0)
4142 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004143 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4144 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004145 /* Worst case length calc to ensure no buffer overrun:
4146
4147 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004148 fmt = %#.<prec>g
4149 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004150 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004151 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004152
4153 'f' formats:
4154 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4155 len = 1 + 50 + 1 + prec = 52 + prec
4156
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004157 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004158 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004159
4160 */
4161 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4162 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004163 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004164 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004165 return -1;
4166 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004167 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4168 (flags&F_ALT) ? "#" : "",
4169 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004170 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004171 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004172}
4173
Tim Peters38fd5b62000-09-21 05:43:11 +00004174/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4175 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4176 * Python's regular ints.
4177 * Return value: a new PyString*, or NULL if error.
4178 * . *pbuf is set to point into it,
4179 * *plen set to the # of chars following that.
4180 * Caller must decref it when done using pbuf.
4181 * The string starting at *pbuf is of the form
4182 * "-"? ("0x" | "0X")? digit+
4183 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004184 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004185 * There will be at least prec digits, zero-filled on the left if
4186 * necessary to get that many.
4187 * val object to be converted
4188 * flags bitmask of format flags; only F_ALT is looked at
4189 * prec minimum number of digits; 0-fill on left if needed
4190 * type a character in [duoxX]; u acts the same as d
4191 *
4192 * CAUTION: o, x and X conversions on regular ints can never
4193 * produce a '-' sign, but can for Python's unbounded ints.
4194 */
4195PyObject*
4196_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4197 char **pbuf, int *plen)
4198{
4199 PyObject *result = NULL;
4200 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004201 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004202 int sign; /* 1 if '-', else 0 */
4203 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004204 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004205 int numdigits; /* len == numnondigits + numdigits */
4206 int numnondigits = 0;
4207
4208 switch (type) {
4209 case 'd':
4210 case 'u':
4211 result = val->ob_type->tp_str(val);
4212 break;
4213 case 'o':
4214 result = val->ob_type->tp_as_number->nb_oct(val);
4215 break;
4216 case 'x':
4217 case 'X':
4218 numnondigits = 2;
4219 result = val->ob_type->tp_as_number->nb_hex(val);
4220 break;
4221 default:
4222 assert(!"'type' not in [duoxX]");
4223 }
4224 if (!result)
4225 return NULL;
4226
4227 /* To modify the string in-place, there can only be one reference. */
4228 if (result->ob_refcnt != 1) {
4229 PyErr_BadInternalCall();
4230 return NULL;
4231 }
4232 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004233 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004234 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004235 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4236 return NULL;
4237 }
4238 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004239 if (buf[len-1] == 'L') {
4240 --len;
4241 buf[len] = '\0';
4242 }
4243 sign = buf[0] == '-';
4244 numnondigits += sign;
4245 numdigits = len - numnondigits;
4246 assert(numdigits > 0);
4247
Tim Petersfff53252001-04-12 18:38:48 +00004248 /* Get rid of base marker unless F_ALT */
4249 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004250 /* Need to skip 0x, 0X or 0. */
4251 int skipped = 0;
4252 switch (type) {
4253 case 'o':
4254 assert(buf[sign] == '0');
4255 /* If 0 is only digit, leave it alone. */
4256 if (numdigits > 1) {
4257 skipped = 1;
4258 --numdigits;
4259 }
4260 break;
4261 case 'x':
4262 case 'X':
4263 assert(buf[sign] == '0');
4264 assert(buf[sign + 1] == 'x');
4265 skipped = 2;
4266 numnondigits -= 2;
4267 break;
4268 }
4269 if (skipped) {
4270 buf += skipped;
4271 len -= skipped;
4272 if (sign)
4273 buf[0] = '-';
4274 }
4275 assert(len == numnondigits + numdigits);
4276 assert(numdigits > 0);
4277 }
4278
4279 /* Fill with leading zeroes to meet minimum width. */
4280 if (prec > numdigits) {
4281 PyObject *r1 = PyString_FromStringAndSize(NULL,
4282 numnondigits + prec);
4283 char *b1;
4284 if (!r1) {
4285 Py_DECREF(result);
4286 return NULL;
4287 }
4288 b1 = PyString_AS_STRING(r1);
4289 for (i = 0; i < numnondigits; ++i)
4290 *b1++ = *buf++;
4291 for (i = 0; i < prec - numdigits; i++)
4292 *b1++ = '0';
4293 for (i = 0; i < numdigits; i++)
4294 *b1++ = *buf++;
4295 *b1 = '\0';
4296 Py_DECREF(result);
4297 result = r1;
4298 buf = PyString_AS_STRING(result);
4299 len = numnondigits + prec;
4300 }
4301
4302 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004303 if (type == 'X') {
4304 /* Need to convert all lower case letters to upper case.
4305 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004306 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004307 if (buf[i] >= 'a' && buf[i] <= 'x')
4308 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004309 }
4310 *pbuf = buf;
4311 *plen = len;
4312 return result;
4313}
4314
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004315Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004316formatint(char *buf, size_t buflen, int flags,
4317 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004318{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004319 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004320 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4321 + 1 + 1 = 24 */
4322 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004323 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004324 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004325
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004326 x = PyInt_AsLong(v);
4327 if (x == -1 && PyErr_Occurred()) {
4328 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004329 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004330 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004331 if (x < 0 && type == 'u') {
4332 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004333 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004334 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4335 sign = "-";
4336 else
4337 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004338 if (prec < 0)
4339 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340
4341 if ((flags & F_ALT) &&
4342 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004343 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004344 * of issues that cause pain:
4345 * - when 0 is being converted, the C standard leaves off
4346 * the '0x' or '0X', which is inconsistent with other
4347 * %#x/%#X conversions and inconsistent with Python's
4348 * hex() function
4349 * - there are platforms that violate the standard and
4350 * convert 0 with the '0x' or '0X'
4351 * (Metrowerks, Compaq Tru64)
4352 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004353 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004354 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004355 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004356 * We can achieve the desired consistency by inserting our
4357 * own '0x' or '0X' prefix, and substituting %x/%X in place
4358 * of %#x/%#X.
4359 *
4360 * Note that this is the same approach as used in
4361 * formatint() in unicodeobject.c
4362 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004363 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4364 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004365 }
4366 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004367 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4368 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004369 prec, type);
4370 }
4371
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004372 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4373 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004374 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004375 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004376 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004377 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004378 return -1;
4379 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004380 if (sign[0])
4381 PyOS_snprintf(buf, buflen, fmt, -x);
4382 else
4383 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004384 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004385}
4386
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004387Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004388formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004389{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004390 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004391 if (PyString_Check(v)) {
4392 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004393 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004394 }
4395 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004396 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004397 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004398 }
4399 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004400 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004401}
4402
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004403/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4404
4405 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4406 chars are formatted. XXX This is a magic number. Each formatting
4407 routine does bounds checking to ensure no overflow, but a better
4408 solution may be to malloc a buffer of appropriate size for each
4409 format. For now, the current solution is sufficient.
4410*/
4411#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004412
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004413PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004414PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004415{
4416 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004417 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004418 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004419 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004420 PyObject *result, *orig_args;
4421#ifdef Py_USING_UNICODE
4422 PyObject *v, *w;
4423#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004424 PyObject *dict = NULL;
4425 if (format == NULL || !PyString_Check(format) || args == NULL) {
4426 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004427 return NULL;
4428 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004429 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004430 fmt = PyString_AS_STRING(format);
4431 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004432 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004433 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004434 if (result == NULL)
4435 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004436 res = PyString_AsString(result);
4437 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004438 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004439 argidx = 0;
4440 }
4441 else {
4442 arglen = -1;
4443 argidx = -2;
4444 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004445 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4446 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004447 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004448 while (--fmtcnt >= 0) {
4449 if (*fmt != '%') {
4450 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004451 rescnt = fmtcnt + 100;
4452 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004453 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004454 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004455 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004456 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004457 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004458 }
4459 *res++ = *fmt++;
4460 }
4461 else {
4462 /* Got a format specifier */
4463 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004464 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004465 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004466 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004467 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004468 PyObject *v = NULL;
4469 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004470 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004471 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004472 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004473 char formatbuf[FORMATBUFLEN];
4474 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004475#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004476 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004477 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004478#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004479
Guido van Rossumda9c2711996-12-05 21:58:58 +00004480 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004481 if (*fmt == '(') {
4482 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004483 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004484 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004485 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004486
4487 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004488 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004489 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004490 goto error;
4491 }
4492 ++fmt;
4493 --fmtcnt;
4494 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004495 /* Skip over balanced parentheses */
4496 while (pcount > 0 && --fmtcnt >= 0) {
4497 if (*fmt == ')')
4498 --pcount;
4499 else if (*fmt == '(')
4500 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004501 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004502 }
4503 keylen = fmt - keystart - 1;
4504 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004505 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004506 "incomplete format key");
4507 goto error;
4508 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004509 key = PyString_FromStringAndSize(keystart,
4510 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004511 if (key == NULL)
4512 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004513 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004514 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004515 args_owned = 0;
4516 }
4517 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004519 if (args == NULL) {
4520 goto error;
4521 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004522 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004523 arglen = -1;
4524 argidx = -2;
4525 }
Guido van Rossume5372401993-03-16 12:15:04 +00004526 while (--fmtcnt >= 0) {
4527 switch (c = *fmt++) {
4528 case '-': flags |= F_LJUST; continue;
4529 case '+': flags |= F_SIGN; continue;
4530 case ' ': flags |= F_BLANK; continue;
4531 case '#': flags |= F_ALT; continue;
4532 case '0': flags |= F_ZERO; continue;
4533 }
4534 break;
4535 }
4536 if (c == '*') {
4537 v = getnextarg(args, arglen, &argidx);
4538 if (v == NULL)
4539 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004540 if (!PyInt_Check(v)) {
4541 PyErr_SetString(PyExc_TypeError,
4542 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004543 goto error;
4544 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004546 if (width < 0) {
4547 flags |= F_LJUST;
4548 width = -width;
4549 }
Guido van Rossume5372401993-03-16 12:15:04 +00004550 if (--fmtcnt >= 0)
4551 c = *fmt++;
4552 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004553 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004554 width = c - '0';
4555 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004556 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004557 if (!isdigit(c))
4558 break;
4559 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004560 PyErr_SetString(
4561 PyExc_ValueError,
4562 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004563 goto error;
4564 }
4565 width = width*10 + (c - '0');
4566 }
4567 }
4568 if (c == '.') {
4569 prec = 0;
4570 if (--fmtcnt >= 0)
4571 c = *fmt++;
4572 if (c == '*') {
4573 v = getnextarg(args, arglen, &argidx);
4574 if (v == NULL)
4575 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 if (!PyInt_Check(v)) {
4577 PyErr_SetString(
4578 PyExc_TypeError,
4579 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004580 goto error;
4581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004582 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004583 if (prec < 0)
4584 prec = 0;
4585 if (--fmtcnt >= 0)
4586 c = *fmt++;
4587 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004588 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004589 prec = c - '0';
4590 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004591 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004592 if (!isdigit(c))
4593 break;
4594 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 PyErr_SetString(
4596 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004597 "prec too big");
4598 goto error;
4599 }
4600 prec = prec*10 + (c - '0');
4601 }
4602 }
4603 } /* prec */
4604 if (fmtcnt >= 0) {
4605 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004606 if (--fmtcnt >= 0)
4607 c = *fmt++;
4608 }
4609 }
4610 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004611 PyErr_SetString(PyExc_ValueError,
4612 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004613 goto error;
4614 }
4615 if (c != '%') {
4616 v = getnextarg(args, arglen, &argidx);
4617 if (v == NULL)
4618 goto error;
4619 }
4620 sign = 0;
4621 fill = ' ';
4622 switch (c) {
4623 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004624 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004625 len = 1;
4626 break;
4627 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004628#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004629 if (PyUnicode_Check(v)) {
4630 fmt = fmt_start;
4631 argidx = argidx_start;
4632 goto unicode;
4633 }
Georg Brandld45014b2005-10-01 17:06:00 +00004634#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004635 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004636#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004637 if (temp != NULL && PyUnicode_Check(temp)) {
4638 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004639 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004640 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004641 goto unicode;
4642 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004643#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004644 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004645 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004646 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004647 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004648 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004649 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004650 if (!PyString_Check(temp)) {
4651 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004652 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004653 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004654 goto error;
4655 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004656 pbuf = PyString_AS_STRING(temp);
4657 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004658 if (prec >= 0 && len > prec)
4659 len = prec;
4660 break;
4661 case 'i':
4662 case 'd':
4663 case 'u':
4664 case 'o':
4665 case 'x':
4666 case 'X':
4667 if (c == 'i')
4668 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004669 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004670 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004671 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004672 prec, c, &pbuf, &ilen);
4673 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004674 if (!temp)
4675 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004676 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004677 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004678 else {
4679 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004680 len = formatint(pbuf,
4681 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004682 flags, prec, c, v);
4683 if (len < 0)
4684 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004685 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004686 }
4687 if (flags & F_ZERO)
4688 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004689 break;
4690 case 'e':
4691 case 'E':
4692 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004693 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004694 case 'g':
4695 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004696 if (c == 'F')
4697 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004698 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004699 len = formatfloat(pbuf, sizeof(formatbuf),
4700 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004701 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004702 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004703 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004704 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004705 fill = '0';
4706 break;
4707 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004708#ifdef Py_USING_UNICODE
4709 if (PyUnicode_Check(v)) {
4710 fmt = fmt_start;
4711 argidx = argidx_start;
4712 goto unicode;
4713 }
4714#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004715 pbuf = formatbuf;
4716 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004717 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004718 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004719 break;
4720 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004721 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004722 "unsupported format character '%c' (0x%x) "
4723 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004724 c, c,
4725 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004726 goto error;
4727 }
4728 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004729 if (*pbuf == '-' || *pbuf == '+') {
4730 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004731 len--;
4732 }
4733 else if (flags & F_SIGN)
4734 sign = '+';
4735 else if (flags & F_BLANK)
4736 sign = ' ';
4737 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004738 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004739 }
4740 if (width < len)
4741 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004742 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004743 reslen -= rescnt;
4744 rescnt = width + fmtcnt + 100;
4745 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004746 if (reslen < 0) {
4747 Py_DECREF(result);
4748 return PyErr_NoMemory();
4749 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004750 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004751 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004752 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004753 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004754 }
4755 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004756 if (fill != ' ')
4757 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004758 rescnt--;
4759 if (width > len)
4760 width--;
4761 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004762 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4763 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004764 assert(pbuf[1] == c);
4765 if (fill != ' ') {
4766 *res++ = *pbuf++;
4767 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004768 }
Tim Petersfff53252001-04-12 18:38:48 +00004769 rescnt -= 2;
4770 width -= 2;
4771 if (width < 0)
4772 width = 0;
4773 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004774 }
4775 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004776 do {
4777 --rescnt;
4778 *res++ = fill;
4779 } while (--width > len);
4780 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004781 if (fill == ' ') {
4782 if (sign)
4783 *res++ = sign;
4784 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004785 (c == 'x' || c == 'X')) {
4786 assert(pbuf[0] == '0');
4787 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004788 *res++ = *pbuf++;
4789 *res++ = *pbuf++;
4790 }
4791 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004792 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004793 res += len;
4794 rescnt -= len;
4795 while (--width >= len) {
4796 --rescnt;
4797 *res++ = ' ';
4798 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004799 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004800 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004801 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004802 goto error;
4803 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004804 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004805 } /* '%' */
4806 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004807 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004808 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004809 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004810 goto error;
4811 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004812 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004813 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004814 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004815 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004816 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004817
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004818#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004819 unicode:
4820 if (args_owned) {
4821 Py_DECREF(args);
4822 args_owned = 0;
4823 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004824 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004825 if (PyTuple_Check(orig_args) && argidx > 0) {
4826 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004827 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004828 v = PyTuple_New(n);
4829 if (v == NULL)
4830 goto error;
4831 while (--n >= 0) {
4832 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4833 Py_INCREF(w);
4834 PyTuple_SET_ITEM(v, n, w);
4835 }
4836 args = v;
4837 } else {
4838 Py_INCREF(orig_args);
4839 args = orig_args;
4840 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004841 args_owned = 1;
4842 /* Take what we have of the result and let the Unicode formatting
4843 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004844 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004845 if (_PyString_Resize(&result, rescnt))
4846 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004847 fmtcnt = PyString_GET_SIZE(format) - \
4848 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004849 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4850 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004851 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004852 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004853 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004854 if (v == NULL)
4855 goto error;
4856 /* Paste what we have (result) to what the Unicode formatting
4857 function returned (v) and return the result (or error) */
4858 w = PyUnicode_Concat(result, v);
4859 Py_DECREF(result);
4860 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004861 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004862 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004863#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004864
Guido van Rossume5372401993-03-16 12:15:04 +00004865 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004866 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004867 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004868 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004869 }
Guido van Rossume5372401993-03-16 12:15:04 +00004870 return NULL;
4871}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004872
Guido van Rossum2a61e741997-01-18 07:55:05 +00004873void
Fred Drakeba096332000-07-09 07:04:36 +00004874PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004875{
4876 register PyStringObject *s = (PyStringObject *)(*p);
4877 PyObject *t;
4878 if (s == NULL || !PyString_Check(s))
4879 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004880 /* If it's a string subclass, we don't really know what putting
4881 it in the interned dict might do. */
4882 if (!PyString_CheckExact(s))
4883 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004884 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004885 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004886 if (interned == NULL) {
4887 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004888 if (interned == NULL) {
4889 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004890 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004891 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004892 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004893 t = PyDict_GetItem(interned, (PyObject *)s);
4894 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004895 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004896 Py_DECREF(*p);
4897 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004898 return;
4899 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004900
Armin Rigo79f7ad22004-08-07 19:27:39 +00004901 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004902 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004903 return;
4904 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004905 /* The two references in interned are not counted by refcnt.
4906 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004907 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004908 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004909}
4910
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004911void
4912PyString_InternImmortal(PyObject **p)
4913{
4914 PyString_InternInPlace(p);
4915 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4916 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4917 Py_INCREF(*p);
4918 }
4919}
4920
Guido van Rossum2a61e741997-01-18 07:55:05 +00004921
4922PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004923PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004924{
4925 PyObject *s = PyString_FromString(cp);
4926 if (s == NULL)
4927 return NULL;
4928 PyString_InternInPlace(&s);
4929 return s;
4930}
4931
Guido van Rossum8cf04761997-08-02 02:57:45 +00004932void
Fred Drakeba096332000-07-09 07:04:36 +00004933PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004934{
4935 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004936 for (i = 0; i < UCHAR_MAX + 1; i++) {
4937 Py_XDECREF(characters[i]);
4938 characters[i] = NULL;
4939 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004940 Py_XDECREF(nullstring);
4941 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004942}
Barry Warsawa903ad982001-02-23 16:40:48 +00004943
Barry Warsawa903ad982001-02-23 16:40:48 +00004944void _Py_ReleaseInternedStrings(void)
4945{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004946 PyObject *keys;
4947 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004948 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004949
4950 if (interned == NULL || !PyDict_Check(interned))
4951 return;
4952 keys = PyDict_Keys(interned);
4953 if (keys == NULL || !PyList_Check(keys)) {
4954 PyErr_Clear();
4955 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004956 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957
4958 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4959 detector, interned strings are not forcibly deallocated; rather, we
4960 give them their stolen references back, and then clear and DECREF
4961 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004962
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004963 fprintf(stderr, "releasing interned strings\n");
4964 n = PyList_GET_SIZE(keys);
4965 for (i = 0; i < n; i++) {
4966 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4967 switch (s->ob_sstate) {
4968 case SSTATE_NOT_INTERNED:
4969 /* XXX Shouldn't happen */
4970 break;
4971 case SSTATE_INTERNED_IMMORTAL:
4972 s->ob_refcnt += 1;
4973 break;
4974 case SSTATE_INTERNED_MORTAL:
4975 s->ob_refcnt += 2;
4976 break;
4977 default:
4978 Py_FatalError("Inconsistent interned string state.");
4979 }
4980 s->ob_sstate = SSTATE_NOT_INTERNED;
4981 }
4982 Py_DECREF(keys);
4983 PyDict_Clear(interned);
4984 Py_DECREF(interned);
4985 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004986}