blob: 138ebfe938d69f8e4c31e03538c083c54998a5ad [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#ifdef USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhaf722372006-05-25 17:55:31 +0000775
Fredrik Lundha50d2012006-05-26 17:04:58 +0000776#include "stringlib/fastsearch.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777
Fredrik Lundha50d2012006-05-26 17:04:58 +0000778#endif
Fredrik Lundhaf722372006-05-25 17:55:31 +0000779
780/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781/* Methods */
782
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000783static int
Fred Drakeba096332000-07-09 07:04:36 +0000784string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000785{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000786 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000788 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000791 if (! PyString_CheckExact(op)) {
792 int ret;
793 /* A str subclass may have its own __str__ method. */
794 op = (PyStringObject *) PyObject_Str((PyObject *)op);
795 if (op == NULL)
796 return -1;
797 ret = string_print(op, fp, flags);
798 Py_DECREF(op);
799 return ret;
800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000801 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000802#ifdef __VMS
803 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
804#else
805 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
806#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000809
Thomas Wouters7e474022000-07-16 12:04:32 +0000810 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000811 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000812 if (memchr(op->ob_sval, '\'', op->ob_size) &&
813 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 quote = '"';
815
816 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 for (i = 0; i < op->ob_size; i++) {
818 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000821 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000822 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000823 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000824 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000825 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fprintf(fp, "\\r");
827 else if (c < ' ' || c >= 0x7f)
828 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000829 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000833 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834}
835
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836PyObject *
837PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000839 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000840 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000841 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000842 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000843 PyErr_SetString(PyExc_OverflowError,
844 "string is too large to make repr");
845 }
846 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000848 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
850 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000851 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 register char c;
853 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000854 int quote;
855
Thomas Wouters7e474022000-07-16 12:04:32 +0000856 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000857 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000858 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000860 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000861 quote = '"';
862
Tim Peters9161c8b2001-12-03 01:55:38 +0000863 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 /* There's at least enough room for a hex escape
867 and a closing quote. */
868 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000870 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000872 else if (c == '\t')
873 *p++ = '\\', *p++ = 't';
874 else if (c == '\n')
875 *p++ = '\\', *p++ = 'n';
876 else if (c == '\r')
877 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000878 else if (c < ' ' || c >= 0x7f) {
879 /* For performance, we don't want to call
880 PyOS_snprintf here (extra layers of
881 function call). */
882 sprintf(p, "\\x%02x", c & 0xff);
883 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000884 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000885 else
886 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000891 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000892 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000893 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895}
896
Guido van Rossum189f1df2001-05-01 16:51:53 +0000897static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000898string_repr(PyObject *op)
899{
900 return PyString_Repr(op, 1);
901}
902
903static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000904string_str(PyObject *s)
905{
Tim Petersc9933152001-10-16 20:18:24 +0000906 assert(PyString_Check(s));
907 if (PyString_CheckExact(s)) {
908 Py_INCREF(s);
909 return s;
910 }
911 else {
912 /* Subtype -- return genuine string with the same value. */
913 PyStringObject *t = (PyStringObject *) s;
914 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
915 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000916}
917
Martin v. Löwis18e16552006-02-15 17:27:45 +0000918static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000919string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920{
921 return a->ob_size;
922}
923
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000925string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926{
Andrew Dalke598710c2006-05-25 18:18:39 +0000927 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 register PyStringObject *op;
929 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000930#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 if (PyUnicode_Check(bb))
932 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000933#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000934 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000935 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000936 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 return NULL;
938 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000939#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000941 if ((a->ob_size == 0 || b->ob_size == 0) &&
942 PyString_CheckExact(a) && PyString_CheckExact(b)) {
943 if (a->ob_size == 0) {
944 Py_INCREF(bb);
945 return bb;
946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 Py_INCREF(a);
948 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 }
950 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000951 if (size < 0) {
952 PyErr_SetString(PyExc_OverflowError,
953 "strings are too large to concat");
954 return NULL;
955 }
956
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000957 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000958 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000959 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000960 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000961 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000962 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000963 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000964 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
965 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000966 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968#undef b
969}
970
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000972string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000974 register Py_ssize_t i;
975 register Py_ssize_t j;
976 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000978 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979 if (n < 0)
980 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000981 /* watch out for overflows: the size can overflow int,
982 * and the # of bytes needed can overflow size_t
983 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000985 if (n && size / n != a->ob_size) {
986 PyErr_SetString(PyExc_OverflowError,
987 "repeated string is too long");
988 return NULL;
989 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000990 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 Py_INCREF(a);
992 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 }
Tim Peterse7c05322004-06-27 17:24:49 +0000994 nbytes = (size_t)size;
995 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000996 PyErr_SetString(PyExc_OverflowError,
997 "repeated string is too long");
998 return NULL;
999 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001001 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001002 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001003 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001004 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001005 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001006 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001007 op->ob_sval[size] = '\0';
1008 if (a->ob_size == 1 && n > 0) {
1009 memset(op->ob_sval, a->ob_sval[0] , n);
1010 return (PyObject *) op;
1011 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001012 i = 0;
1013 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1015 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001016 }
1017 while (i < size) {
1018 j = (i <= size-i) ? i : size-i;
1019 memcpy(op->ob_sval+i, op->ob_sval, j);
1020 i += j;
1021 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001023}
1024
1025/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1026
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001028string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001029 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001030 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001031{
1032 if (i < 0)
1033 i = 0;
1034 if (j < 0)
1035 j = 0; /* Avoid signed/unsigned bug in next line */
1036 if (j > a->ob_size)
1037 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001038 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1039 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001040 Py_INCREF(a);
1041 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 }
1043 if (j < i)
1044 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001045 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046}
1047
Guido van Rossum9284a572000-03-07 15:53:43 +00001048static int
Fred Drakeba096332000-07-09 07:04:36 +00001049string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001050{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001051 char *s = PyString_AS_STRING(a);
1052 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001054#ifdef USE_FAST
1055 Py_ssize_t pos;
1056#else
1057 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001058 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001059 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001060#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001061
1062 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001063#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001064 if (PyUnicode_Check(el))
1065 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001067 if (!PyString_Check(el)) {
1068 PyErr_SetString(PyExc_TypeError,
1069 "'in <string>' requires string as left operand");
1070 return -1;
1071 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001072 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001073
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001074 if (len_sub == 0)
1075 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001076
1077#ifdef USE_FAST
1078 pos = fastsearch(
1079 s, PyString_GET_SIZE(a),
1080 sub, len_sub, FAST_SEARCH
1081 );
1082 return (pos != -1);
1083#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001084 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001085 substring. When s<last, there is still room for a possible match
1086 and s[0] through s[len_sub-1] will be in bounds.
1087 shortsub is len_sub minus the last character which is checked
1088 separately just before the memcmp(). That check helps prevent
1089 false starts and saves the setup time for memcmp().
1090 */
1091 firstchar = sub[0];
1092 shortsub = len_sub - 1;
1093 lastchar = sub[shortsub];
1094 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1095 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001096 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001097 if (s == NULL)
1098 return 0;
1099 assert(s < last);
1100 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001101 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001102 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001103 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001104#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001105 return 0;
1106}
1107
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001108static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001109string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001112 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001113 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001114 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115 return NULL;
1116 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001117 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001118 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001119 if (v == NULL)
1120 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001121 else {
1122#ifdef COUNT_ALLOCS
1123 one_strings++;
1124#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001125 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001126 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001127 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001128}
1129
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130static PyObject*
1131string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001132{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001134 Py_ssize_t len_a, len_b;
1135 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001136 PyObject *result;
1137
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001138 /* Make sure both arguments are strings. */
1139 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001140 result = Py_NotImplemented;
1141 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001143 if (a == b) {
1144 switch (op) {
1145 case Py_EQ:case Py_LE:case Py_GE:
1146 result = Py_True;
1147 goto out;
1148 case Py_NE:case Py_LT:case Py_GT:
1149 result = Py_False;
1150 goto out;
1151 }
1152 }
1153 if (op == Py_EQ) {
1154 /* Supporting Py_NE here as well does not save
1155 much time, since Py_NE is rarely used. */
1156 if (a->ob_size == b->ob_size
1157 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001158 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 a->ob_size) == 0)) {
1160 result = Py_True;
1161 } else {
1162 result = Py_False;
1163 }
1164 goto out;
1165 }
1166 len_a = a->ob_size; len_b = b->ob_size;
1167 min_len = (len_a < len_b) ? len_a : len_b;
1168 if (min_len > 0) {
1169 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1170 if (c==0)
1171 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1172 }else
1173 c = 0;
1174 if (c == 0)
1175 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1176 switch (op) {
1177 case Py_LT: c = c < 0; break;
1178 case Py_LE: c = c <= 0; break;
1179 case Py_EQ: assert(0); break; /* unreachable */
1180 case Py_NE: c = c != 0; break;
1181 case Py_GT: c = c > 0; break;
1182 case Py_GE: c = c >= 0; break;
1183 default:
1184 result = Py_NotImplemented;
1185 goto out;
1186 }
1187 result = c ? Py_True : Py_False;
1188 out:
1189 Py_INCREF(result);
1190 return result;
1191}
1192
1193int
1194_PyString_Eq(PyObject *o1, PyObject *o2)
1195{
1196 PyStringObject *a, *b;
1197 a = (PyStringObject*)o1;
1198 b = (PyStringObject*)o2;
1199 return a->ob_size == b->ob_size
1200 && *a->ob_sval == *b->ob_sval
1201 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001202}
1203
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204static long
Fred Drakeba096332000-07-09 07:04:36 +00001205string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001206{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001207 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001208 register unsigned char *p;
1209 register long x;
1210
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001211 if (a->ob_shash != -1)
1212 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001213 len = a->ob_size;
1214 p = (unsigned char *) a->ob_sval;
1215 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001216 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001217 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001218 x ^= a->ob_size;
1219 if (x == -1)
1220 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001221 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001222 return x;
1223}
1224
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001225#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1226
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227static PyObject*
1228string_subscript(PyStringObject* self, PyObject* item)
1229{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001230 PyNumberMethods *nb = item->ob_type->tp_as_number;
1231 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1232 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233 if (i == -1 && PyErr_Occurred())
1234 return NULL;
1235 if (i < 0)
1236 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001237 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001238 }
1239 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241 char* source_buf;
1242 char* result_buf;
1243 PyObject* result;
1244
Tim Petersae1d0c92006-03-17 03:29:34 +00001245 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 PyString_GET_SIZE(self),
1247 &start, &stop, &step, &slicelength) < 0) {
1248 return NULL;
1249 }
1250
1251 if (slicelength <= 0) {
1252 return PyString_FromStringAndSize("", 0);
1253 }
1254 else {
1255 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001256 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001257 if (result_buf == NULL)
1258 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259
Tim Petersae1d0c92006-03-17 03:29:34 +00001260 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001261 cur += step, i++) {
1262 result_buf[i] = source_buf[cur];
1263 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001264
1265 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001266 slicelength);
1267 PyMem_Free(result_buf);
1268 return result;
1269 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001270 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001271 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001272 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001273 "string indices must be integers");
1274 return NULL;
1275 }
1276}
1277
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278static Py_ssize_t
1279string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001280{
1281 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001282 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001283 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001284 return -1;
1285 }
1286 *ptr = (void *)self->ob_sval;
1287 return self->ob_size;
1288}
1289
Martin v. Löwis18e16552006-02-15 17:27:45 +00001290static Py_ssize_t
1291string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001292{
Guido van Rossum045e6881997-09-08 18:30:11 +00001293 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001294 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001295 return -1;
1296}
1297
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298static Py_ssize_t
1299string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001300{
1301 if ( lenp )
1302 *lenp = self->ob_size;
1303 return 1;
1304}
1305
Martin v. Löwis18e16552006-02-15 17:27:45 +00001306static Py_ssize_t
1307string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001308{
1309 if ( index != 0 ) {
1310 PyErr_SetString(PyExc_SystemError,
1311 "accessing non-existent string segment");
1312 return -1;
1313 }
1314 *ptr = self->ob_sval;
1315 return self->ob_size;
1316}
1317
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001318static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001319 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001320 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001321 (ssizeargfunc)string_repeat, /*sq_repeat*/
1322 (ssizeargfunc)string_item, /*sq_item*/
1323 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001324 0, /*sq_ass_item*/
1325 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001326 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001327};
1328
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001329static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001330 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001331 (binaryfunc)string_subscript,
1332 0,
1333};
1334
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001335static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001336 (readbufferproc)string_buffer_getreadbuf,
1337 (writebufferproc)string_buffer_getwritebuf,
1338 (segcountproc)string_buffer_getsegcount,
1339 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001340};
1341
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342
1343
1344#define LEFTSTRIP 0
1345#define RIGHTSTRIP 1
1346#define BOTHSTRIP 2
1347
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001348/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001349static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1350
1351#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001352
Andrew Dalke525eab32006-05-26 14:00:45 +00001353
1354/* Overallocate the initial list to reduce the number of reallocs for small
1355 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1356 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1357 text (roughly 11 words per line) and field delimited data (usually 1-10
1358 fields). For large strings the split algorithms are bandwidth limited
1359 so increasing the preallocation likely will not improve things.*/
1360
1361#define MAX_PREALLOC 12
1362
1363/* 5 splits gives 6 elements */
1364#define PREALLOC_SIZE(maxsplit) \
1365 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1366
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001367#define SPLIT_APPEND(data, left, right) \
1368 str = PyString_FromStringAndSize((data) + (left), \
1369 (right) - (left)); \
1370 if (str == NULL) \
1371 goto onError; \
1372 if (PyList_Append(list, str)) { \
1373 Py_DECREF(str); \
1374 goto onError; \
1375 } \
1376 else \
1377 Py_DECREF(str);
1378
Andrew Dalke02758d62006-05-26 15:21:01 +00001379#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001380 str = PyString_FromStringAndSize((data) + (left), \
1381 (right) - (left)); \
1382 if (str == NULL) \
1383 goto onError; \
1384 if (count < MAX_PREALLOC) { \
1385 PyList_SET_ITEM(list, count, str); \
1386 } else { \
1387 if (PyList_Append(list, str)) { \
1388 Py_DECREF(str); \
1389 goto onError; \
1390 } \
1391 else \
1392 Py_DECREF(str); \
1393 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001394 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001395
1396/* Always force the list to the expected size. */
1397#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1398
Andrew Dalke02758d62006-05-26 15:21:01 +00001399#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1400#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1401#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1402#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1403
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001404Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001405split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
Andrew Dalke525eab32006-05-26 14:00:45 +00001407 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410
1411 if (list == NULL)
1412 return NULL;
1413
Andrew Dalke02758d62006-05-26 15:21:01 +00001414 i = j = 0;
1415
1416 while (maxsplit-- > 0) {
1417 SKIP_SPACE(s, i, len);
1418 if (i==len) break;
1419 j = i; i++;
1420 SKIP_NONSPACE(s, i, len);
1421 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001423
1424 if (i < len) {
1425 /* Only occurs when maxsplit was reached */
1426 /* Skip any remaining whitespace and copy to end of string */
1427 SKIP_SPACE(s, i, len);
1428 if (i != len)
1429 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434 Py_DECREF(list);
1435 return NULL;
1436}
1437
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001438Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001439split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440{
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001443 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444
1445 if (list == NULL)
1446 return NULL;
1447
1448 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001449 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001450 if (s[i] == ch) {
1451 if (maxcount-- <= 0)
1452 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001453 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001454 i = j = i + 1;
1455 } else
1456 i++;
1457 }
1458 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001459 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001460 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001461 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 return list;
1463
1464 onError:
1465 Py_DECREF(list);
1466 return NULL;
1467}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001469PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470"S.split([sep [,maxsplit]]) -> list of strings\n\
1471\n\
1472Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001474splits are done. If sep is not specified or is None, any\n\
1475whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476
1477static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001478string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001480 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001481 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001482 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484
Martin v. Löwis9c830762006-04-13 08:37:17 +00001485 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001487 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001488 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (PyString_Check(subobj)) {
1492 sub = PyString_AS_STRING(subobj);
1493 n = PyString_GET_SIZE(subobj);
1494 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001495#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 else if (PyUnicode_Check(subobj))
1497 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001498#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1500 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001501
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 if (n == 0) {
1503 PyErr_SetString(PyExc_ValueError, "empty separator");
1504 return NULL;
1505 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001506 else if (n == 1)
1507 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508
Andrew Dalke525eab32006-05-26 14:00:45 +00001509 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 if (list == NULL)
1511 return NULL;
1512
1513 i = j = 0;
1514 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001515 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001516 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001517 if (maxsplit-- <= 0)
1518 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001519 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 }
1522 else
1523 i++;
1524 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001525 SPLIT_ADD(s, j, len);
1526 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 return list;
1528
Andrew Dalke525eab32006-05-26 14:00:45 +00001529 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 Py_DECREF(list);
1531 return NULL;
1532}
1533
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001534PyDoc_STRVAR(partition__doc__,
1535"S.partition(sep) -> (head, sep, tail)\n\
1536\n\
1537Searches for the separator sep in S, and returns the part before it,\n\
1538the separator itself, and the part after it. If the separator is not\n\
1539found, returns S and two empty strings.");
1540
1541static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001542string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543{
1544 Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
1545 const char *str = PyString_AS_STRING(self), *sep;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546 PyObject * out;
1547
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001548 if (PyString_Check(sep_obj)) {
1549 sep = PyString_AS_STRING(sep_obj);
1550 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001551 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001552#ifdef Py_USING_UNICODE
1553 else if (PyUnicode_Check(sep_obj))
1554 return PyUnicode_Partition((PyObject *)self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001555#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001556 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557 return NULL;
1558
1559 if (sep_len == 0) {
1560 PyErr_SetString(PyExc_ValueError, "empty separator");
1561 return NULL;
1562 }
1563
1564 out = PyTuple_New(3);
1565 if (!out)
1566 return NULL;
1567
1568 pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
1569 if (pos < 0) {
1570 Py_INCREF(self);
1571 PyTuple_SET_ITEM(out, 0, (PyObject*) self);
1572 Py_INCREF(nullstring);
1573 PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring);
1574 Py_INCREF(nullstring);
1575 PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
1576 } else {
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001577 PyObject* obj;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578 PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001579 Py_INCREF(sep_obj);
1580 PyTuple_SET_ITEM(out, 1, sep_obj);
1581 pos += sep_len;
1582 obj = PyString_FromStringAndSize(str + pos, len - pos);
1583 PyTuple_SET_ITEM(out, 2, obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001584 if (PyErr_Occurred()) {
1585 Py_DECREF(out);
1586 return NULL;
1587 }
1588 }
1589
1590 return out;
1591}
1592
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001593Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001594rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595{
Andrew Dalke525eab32006-05-26 14:00:45 +00001596 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001597 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001598 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001599
1600 if (list == NULL)
1601 return NULL;
1602
Andrew Dalke02758d62006-05-26 15:21:01 +00001603 i = j = len-1;
1604
1605 while (maxsplit-- > 0) {
1606 RSKIP_SPACE(s, i);
1607 if (i<0) break;
1608 j = i; i--;
1609 RSKIP_NONSPACE(s, i);
1610 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001611 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001612 if (i >= 0) {
1613 /* Only occurs when maxsplit was reached */
1614 /* Skip any remaining whitespace and copy to beginning of string */
1615 RSKIP_SPACE(s, i);
1616 if (i >= 0)
1617 SPLIT_ADD(s, 0, i + 1);
1618
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001619 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001620 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001621 if (PyList_Reverse(list) < 0)
1622 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001624 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001625 Py_DECREF(list);
1626 return NULL;
1627}
1628
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001629Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001630rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631{
Andrew Dalke525eab32006-05-26 14:00:45 +00001632 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001634 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001635
1636 if (list == NULL)
1637 return NULL;
1638
1639 for (i = j = len - 1; i >= 0; ) {
1640 if (s[i] == ch) {
1641 if (maxcount-- <= 0)
1642 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001643 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001644 j = i = i - 1;
1645 } else
1646 i--;
1647 }
1648 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001651 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001652 if (PyList_Reverse(list) < 0)
1653 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001654 return list;
1655
1656 onError:
1657 Py_DECREF(list);
1658 return NULL;
1659}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660
1661PyDoc_STRVAR(rsplit__doc__,
1662"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1663\n\
1664Return a list of the words in the string S, using sep as the\n\
1665delimiter string, starting at the end of the string and working\n\
1666to the front. If maxsplit is given, at most maxsplit splits are\n\
1667done. If sep is not specified or is None, any whitespace string\n\
1668is a separator.");
1669
1670static PyObject *
1671string_rsplit(PyStringObject *self, PyObject *args)
1672{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001673 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001674 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001676 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001677
Martin v. Löwis9c830762006-04-13 08:37:17 +00001678 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 return NULL;
1680 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001681 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001682 if (subobj == Py_None)
1683 return rsplit_whitespace(s, len, maxsplit);
1684 if (PyString_Check(subobj)) {
1685 sub = PyString_AS_STRING(subobj);
1686 n = PyString_GET_SIZE(subobj);
1687 }
1688#ifdef Py_USING_UNICODE
1689 else if (PyUnicode_Check(subobj))
1690 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1691#endif
1692 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1693 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001694
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695 if (n == 0) {
1696 PyErr_SetString(PyExc_ValueError, "empty separator");
1697 return NULL;
1698 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001699 else if (n == 1)
1700 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701
Andrew Dalke525eab32006-05-26 14:00:45 +00001702 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703 if (list == NULL)
1704 return NULL;
1705
1706 j = len;
1707 i = j - n;
1708 while (i >= 0) {
1709 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1710 if (maxsplit-- <= 0)
1711 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001712 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 j = i;
1714 i -= n;
1715 }
1716 else
1717 i--;
1718 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001719 SPLIT_ADD(s, 0, j);
1720 FIX_PREALLOC_SIZE(list);
1721 if (PyList_Reverse(list) < 0)
1722 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723 return list;
1724
Andrew Dalke525eab32006-05-26 14:00:45 +00001725onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001726 Py_DECREF(list);
1727 return NULL;
1728}
1729
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001731PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732"S.join(sequence) -> string\n\
1733\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001735sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736
1737static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001738string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739{
1740 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001745 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001746 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001747 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748
Tim Peters19fe14e2001-01-19 03:03:47 +00001749 seq = PySequence_Fast(orig, "");
1750 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001751 return NULL;
1752 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001753
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001754 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001755 if (seqlen == 0) {
1756 Py_DECREF(seq);
1757 return PyString_FromString("");
1758 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001760 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001761 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1762 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001764 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001765 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001767
Raymond Hettinger674f2412004-08-23 23:23:54 +00001768 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001769 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001770 * Do a pre-pass to figure out the total amount of space we'll
1771 * need (sz), see whether any argument is absurd, and defer to
1772 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001774 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001775 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 item = PySequence_Fast_GET_ITEM(seq, i);
1777 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001778#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001779 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001780 /* Defer to Unicode join.
1781 * CAUTION: There's no gurantee that the
1782 * original sequence can be iterated over
1783 * again, so we must pass seq here.
1784 */
1785 PyObject *result;
1786 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001787 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001788 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001789 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001790#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001791 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001792 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001793 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001794 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 Py_DECREF(seq);
1796 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001797 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001798 sz += PyString_GET_SIZE(item);
1799 if (i != 0)
1800 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001801 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001802 PyErr_SetString(PyExc_OverflowError,
1803 "join() is too long for a Python string");
1804 Py_DECREF(seq);
1805 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001807 }
1808
1809 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001810 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001811 if (res == NULL) {
1812 Py_DECREF(seq);
1813 return NULL;
1814 }
1815
1816 /* Catenate everything. */
1817 p = PyString_AS_STRING(res);
1818 for (i = 0; i < seqlen; ++i) {
1819 size_t n;
1820 item = PySequence_Fast_GET_ITEM(seq, i);
1821 n = PyString_GET_SIZE(item);
1822 memcpy(p, PyString_AS_STRING(item), n);
1823 p += n;
1824 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 memcpy(p, sep, seplen);
1826 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001827 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001829
Jeremy Hylton49048292000-07-11 03:28:17 +00001830 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832}
1833
Tim Peters52e155e2001-06-16 05:42:57 +00001834PyObject *
1835_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001836{
Tim Petersa7259592001-06-16 05:11:17 +00001837 assert(sep != NULL && PyString_Check(sep));
1838 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001839 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001840}
1841
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001842Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001843string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001844{
1845 if (*end > len)
1846 *end = len;
1847 else if (*end < 0)
1848 *end += len;
1849 if (*end < 0)
1850 *end = 0;
1851 if (*start < 0)
1852 *start += len;
1853 if (*start < 0)
1854 *start = 0;
1855}
1856
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001857Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001858string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001860 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001861 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001862 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864
Martin v. Löwis18e16552006-02-15 17:27:45 +00001865 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001866 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001867 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 return -2;
1869 if (PyString_Check(subobj)) {
1870 sub = PyString_AS_STRING(subobj);
1871 n = PyString_GET_SIZE(subobj);
1872 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001873#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001875 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001876#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 return -2;
1879
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001880 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001882#ifdef USE_FAST
1883 if (n == 0)
1884 return (dir > 0) ? i : last;
1885 if (dir > 0) {
1886 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1887 FAST_SEARCH);
1888 if (pos < 0)
1889 return pos;
1890 return pos + i;
1891 }
1892#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893 if (dir > 0) {
1894 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001896 last -= n;
1897 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001898 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001899 return (long)i;
1900 }
1901 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001903
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001906 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001907 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001908 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001909 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001910
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return -1;
1912}
1913
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.find(sub [,start [,end]]) -> int\n\
1917\n\
1918Return the lowest index in S where substring sub is found,\n\
1919such that sub is contained within s[start,end]. Optional\n\
1920arguments start and end are interpreted as in slice notation.\n\
1921\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001922Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923
1924static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001925string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 if (result == -2)
1929 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001930 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931}
1932
1933
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001934PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935"S.index(sub [,start [,end]]) -> int\n\
1936\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
1939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001940string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 if (result == -2)
1944 return NULL;
1945 if (result == -1) {
1946 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001947 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 return NULL;
1949 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001950 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951}
1952
1953
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001954PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955"S.rfind(sub [,start [,end]]) -> int\n\
1956\n\
1957Return the highest index in S where substring sub is found,\n\
1958such that sub is contained within s[start,end]. Optional\n\
1959arguments start and end are interpreted as in slice notation.\n\
1960\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001961Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
1963static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001964string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967 if (result == -2)
1968 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001969 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970}
1971
1972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001973PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974"S.rindex(sub [,start [,end]]) -> int\n\
1975\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001976Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977
1978static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001979string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982 if (result == -2)
1983 return NULL;
1984 if (result == -1) {
1985 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001986 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 return NULL;
1988 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990}
1991
1992
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001993Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1995{
1996 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001997 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001999 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2000 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001
2002 i = 0;
2003 if (striptype != RIGHTSTRIP) {
2004 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2005 i++;
2006 }
2007 }
2008
2009 j = len;
2010 if (striptype != LEFTSTRIP) {
2011 do {
2012 j--;
2013 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2014 j++;
2015 }
2016
2017 if (i == 0 && j == len && PyString_CheckExact(self)) {
2018 Py_INCREF(self);
2019 return (PyObject*)self;
2020 }
2021 else
2022 return PyString_FromStringAndSize(s+i, j-i);
2023}
2024
2025
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002026Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002027do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028{
2029 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032 i = 0;
2033 if (striptype != RIGHTSTRIP) {
2034 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2035 i++;
2036 }
2037 }
2038
2039 j = len;
2040 if (striptype != LEFTSTRIP) {
2041 do {
2042 j--;
2043 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2044 j++;
2045 }
2046
Tim Peters8fa5dd02001-09-12 02:18:30 +00002047 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 Py_INCREF(self);
2049 return (PyObject*)self;
2050 }
2051 else
2052 return PyString_FromStringAndSize(s+i, j-i);
2053}
2054
2055
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002056Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002057do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2058{
2059 PyObject *sep = NULL;
2060
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002061 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 return NULL;
2063
2064 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002065 if (PyString_Check(sep))
2066 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002067#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002068 else if (PyUnicode_Check(sep)) {
2069 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2070 PyObject *res;
2071 if (uniself==NULL)
2072 return NULL;
2073 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2074 striptype, sep);
2075 Py_DECREF(uniself);
2076 return res;
2077 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002078#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002079 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002080#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002081 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002082#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002083 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002084#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002085 STRIPNAME(striptype));
2086 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087 }
2088
2089 return do_strip(self, striptype);
2090}
2091
2092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095\n\
2096Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002097whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098If chars is given and not None, remove characters in chars instead.\n\
2099If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100
2101static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002104 if (PyTuple_GET_SIZE(args) == 0)
2105 return do_strip(self, BOTHSTRIP); /* Common case */
2106 else
2107 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108}
2109
2110
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002112"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002114Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002115If chars is given and not None, remove characters in chars instead.\n\
2116If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117
2118static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002119string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002121 if (PyTuple_GET_SIZE(args) == 0)
2122 return do_strip(self, LEFTSTRIP); /* Common case */
2123 else
2124 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002129"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002131Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002132If chars is given and not None, remove characters in chars instead.\n\
2133If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002138 if (PyTuple_GET_SIZE(args) == 0)
2139 return do_strip(self, RIGHTSTRIP); /* Common case */
2140 else
2141 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142}
2143
2144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146"S.lower() -> string\n\
2147\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002148Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002150/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2151#ifndef _tolower
2152#define _tolower tolower
2153#endif
2154
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002156string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002158 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002159 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002160 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002162 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002163 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002165
2166 s = PyString_AS_STRING(newobj);
2167
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002168 memcpy(s, PyString_AS_STRING(self), n);
2169
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002171 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172 if (isupper(c))
2173 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002175
Anthony Baxtera6286212006-04-11 07:42:36 +00002176 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177}
2178
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002179PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180"S.upper() -> string\n\
2181\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002182Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002184#ifndef _toupper
2185#define _toupper toupper
2186#endif
2187
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002189string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002191 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002192 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002193 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002195 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002196 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002198
2199 s = PyString_AS_STRING(newobj);
2200
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002201 memcpy(s, PyString_AS_STRING(self), n);
2202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002204 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002205 if (islower(c))
2206 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002208
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213"S.title() -> string\n\
2214\n\
2215Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002216characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217
2218static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002219string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220{
2221 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002222 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002224 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 newobj = PyString_FromStringAndSize(NULL, n);
2227 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230 for (i = 0; i < n; i++) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c)) {
2233 if (!previous_is_cased)
2234 c = toupper(c);
2235 previous_is_cased = 1;
2236 } else if (isupper(c)) {
2237 if (previous_is_cased)
2238 c = tolower(c);
2239 previous_is_cased = 1;
2240 } else
2241 previous_is_cased = 0;
2242 *s_new++ = c;
2243 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245}
2246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248"S.capitalize() -> string\n\
2249\n\
2250Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002251capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252
2253static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002254string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255{
2256 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002257 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002258 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259
Anthony Baxtera6286212006-04-11 07:42:36 +00002260 newobj = PyString_FromStringAndSize(NULL, n);
2261 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002263 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 if (0 < n) {
2265 int c = Py_CHARMASK(*s++);
2266 if (islower(c))
2267 *s_new = toupper(c);
2268 else
2269 *s_new = c;
2270 s_new++;
2271 }
2272 for (i = 1; i < n; i++) {
2273 int c = Py_CHARMASK(*s++);
2274 if (isupper(c))
2275 *s_new = tolower(c);
2276 else
2277 *s_new = c;
2278 s_new++;
2279 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002280 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281}
2282
2283
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002284PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285"S.count(sub[, start[, end]]) -> int\n\
2286\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002287Return the number of non-overlapping occurrences of substring sub in\n\
2288string S[start:end]. Optional arguments start and end are interpreted\n\
2289as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290
2291static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002292string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002293{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002294 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002295 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002296 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002297 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299
Guido van Rossumc6821402000-05-08 14:08:05 +00002300 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2301 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002303
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 if (PyString_Check(subobj)) {
2305 sub = PyString_AS_STRING(subobj);
2306 n = PyString_GET_SIZE(subobj);
2307 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002308#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002309 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002310 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002311 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2312 if (count == -1)
2313 return NULL;
2314 else
2315 return PyInt_FromLong((long) count);
2316 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002317#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2319 return NULL;
2320
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002321 string_adjust_indices(&i, &last, len);
2322
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323 m = last + 1 - n;
2324 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002325 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326
Fredrik Lundhaf722372006-05-25 17:55:31 +00002327#ifdef USE_FAST
2328 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2329 if (r < 0)
2330 r = 0; /* no match */
2331#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 r = 0;
2333 while (i < m) {
Fredrik Lundha50d2012006-05-26 17:04:58 +00002334 const char *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335 if (!memcmp(s+i, sub, n)) {
2336 r++;
2337 i += n;
2338 } else {
2339 i++;
2340 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002341 if (i >= m)
2342 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002343 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002344 if (t == NULL)
2345 break;
2346 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002348#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002349 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350}
2351
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002352PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353"S.swapcase() -> string\n\
2354\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002356converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
2358static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002359string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360{
2361 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002362 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002363 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364
Anthony Baxtera6286212006-04-11 07:42:36 +00002365 newobj = PyString_FromStringAndSize(NULL, n);
2366 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002368 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 for (i = 0; i < n; i++) {
2370 int c = Py_CHARMASK(*s++);
2371 if (islower(c)) {
2372 *s_new = toupper(c);
2373 }
2374 else if (isupper(c)) {
2375 *s_new = tolower(c);
2376 }
2377 else
2378 *s_new = c;
2379 s_new++;
2380 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002381 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382}
2383
2384
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002385PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386"S.translate(table [,deletechars]) -> string\n\
2387\n\
2388Return a copy of the string S, where all characters occurring\n\
2389in the optional argument deletechars are removed, and the\n\
2390remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002391translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
2393static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002394string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396 register char *input, *output;
2397 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002398 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002401 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402 PyObject *result;
2403 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002406 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409
2410 if (PyString_Check(tableobj)) {
2411 table1 = PyString_AS_STRING(tableobj);
2412 tablen = PyString_GET_SIZE(tableobj);
2413 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002414#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002416 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 parameter; instead a mapping to None will cause characters
2418 to be deleted. */
2419 if (delobj != NULL) {
2420 PyErr_SetString(PyExc_TypeError,
2421 "deletions are implemented differently for unicode");
2422 return NULL;
2423 }
2424 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002426#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429
Martin v. Löwis00b61272002-12-12 20:03:19 +00002430 if (tablen != 256) {
2431 PyErr_SetString(PyExc_ValueError,
2432 "translation table must be 256 characters long");
2433 return NULL;
2434 }
2435
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 if (delobj != NULL) {
2437 if (PyString_Check(delobj)) {
2438 del_table = PyString_AS_STRING(delobj);
2439 dellen = PyString_GET_SIZE(delobj);
2440 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002441#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 else if (PyUnicode_Check(delobj)) {
2443 PyErr_SetString(PyExc_TypeError,
2444 "deletions are implemented differently for unicode");
2445 return NULL;
2446 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002447#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2449 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002450 }
2451 else {
2452 del_table = NULL;
2453 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454 }
2455
2456 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002457 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458 result = PyString_FromStringAndSize((char *)NULL, inlen);
2459 if (result == NULL)
2460 return NULL;
2461 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002462 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463
2464 if (dellen == 0) {
2465 /* If no deletions are required, use faster code */
2466 for (i = inlen; --i >= 0; ) {
2467 c = Py_CHARMASK(*input++);
2468 if (Py_CHARMASK((*output++ = table[c])) != c)
2469 changed = 1;
2470 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002471 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472 return result;
2473 Py_DECREF(result);
2474 Py_INCREF(input_obj);
2475 return input_obj;
2476 }
2477
2478 for (i = 0; i < 256; i++)
2479 trans_table[i] = Py_CHARMASK(table[i]);
2480
2481 for (i = 0; i < dellen; i++)
2482 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2483
2484 for (i = inlen; --i >= 0; ) {
2485 c = Py_CHARMASK(*input++);
2486 if (trans_table[c] != -1)
2487 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2488 continue;
2489 changed = 1;
2490 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002491 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492 Py_DECREF(result);
2493 Py_INCREF(input_obj);
2494 return input_obj;
2495 }
2496 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002497 if (inlen > 0)
2498 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499 return result;
2500}
2501
2502
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002503#define FORWARD 1
2504#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002506/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002508/* Don't call if length < 2 */
2509#define Py_STRING_MATCH(target, offset, pattern, length) \
2510 (target[offset] == pattern[0] && \
2511 target[offset+length-1] == pattern[length-1] && \
2512 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002514#define findchar(target, target_len, c) \
2515 ((char *)memchr((const void *)(target), c, target_len))
2516
2517/* String ops must return a string. */
2518/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002519Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002520return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002522 if (PyString_CheckExact(self)) {
2523 Py_INCREF(self);
2524 return self;
2525 }
2526 return (PyStringObject *)PyString_FromStringAndSize(
2527 PyString_AS_STRING(self),
2528 PyString_GET_SIZE(self));
2529}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002531Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002532countchar(char *target, int target_len, char c)
2533{
2534 Py_ssize_t count=0;
2535 char *start=target;
2536 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 while ( (start=findchar(start, end-start, c)) != NULL ) {
2539 count++;
2540 start += 1;
2541 }
2542
2543 return count;
2544}
2545
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002546Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002547findstring(char *target, Py_ssize_t target_len,
2548 char *pattern, Py_ssize_t pattern_len,
2549 Py_ssize_t start,
2550 Py_ssize_t end,
2551 int direction)
2552{
2553 if (start < 0) {
2554 start += target_len;
2555 if (start < 0)
2556 start = 0;
2557 }
2558 if (end > target_len) {
2559 end = target_len;
2560 } else if (end < 0) {
2561 end += target_len;
2562 if (end < 0)
2563 end = 0;
2564 }
2565
2566 /* zero-length substrings always match at the first attempt */
2567 if (pattern_len == 0)
2568 return (direction > 0) ? start : end;
2569
2570 end -= pattern_len;
2571
2572 if (direction < 0) {
2573 for (; end >= start; end--)
2574 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2575 return end;
2576 } else {
2577 for (; start <= end; start++)
2578 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2579 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580 }
2581 return -1;
2582}
2583
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002584Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585countstring(char *target, Py_ssize_t target_len,
2586 char *pattern, Py_ssize_t pattern_len,
2587 Py_ssize_t start,
2588 Py_ssize_t end,
2589 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002592
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 if (start < 0) {
2594 start += target_len;
2595 if (start < 0)
2596 start = 0;
2597 }
2598 if (end > target_len) {
2599 end = target_len;
2600 } else if (end < 0) {
2601 end += target_len;
2602 if (end < 0)
2603 end = 0;
2604 }
2605
2606 /* zero-length substrings match everywhere */
2607 if (pattern_len == 0)
2608 return target_len+1;
2609
2610 end -= pattern_len;
2611
2612 if (direction < 0) {
2613 for (; end >= start; end--)
2614 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2615 count++;
2616 end -= pattern_len-1;
2617 }
2618 } else {
2619 for (; start <= end; start++)
2620 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2621 count++;
2622 start += pattern_len-1;
2623 }
2624 }
2625 return count;
2626}
2627
2628
2629/* Algorithms for difference cases of string replacement */
2630
2631/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002632Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633replace_interleave(PyStringObject *self,
2634 PyStringObject *to,
2635 Py_ssize_t maxcount)
2636{
2637 char *self_s, *to_s, *result_s;
2638 Py_ssize_t self_len, to_len, result_len;
2639 Py_ssize_t count, i, product;
2640 PyStringObject *result;
2641
2642 self_len = PyString_GET_SIZE(self);
2643 to_len = PyString_GET_SIZE(to);
2644
2645 /* 1 at the end plus 1 after every character */
2646 count = self_len+1;
2647 if (maxcount < count)
2648 count = maxcount;
2649
2650 /* Check for overflow */
2651 /* result_len = count * to_len + self_len; */
2652 product = count * to_len;
2653 if (product / to_len != count) {
2654 PyErr_SetString(PyExc_OverflowError,
2655 "replace string is too long");
2656 return NULL;
2657 }
2658 result_len = product + self_len;
2659 if (result_len < 0) {
2660 PyErr_SetString(PyExc_OverflowError,
2661 "replace string is too long");
2662 return NULL;
2663 }
2664
2665 if (! (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) )
2667 return NULL;
2668
2669 self_s = PyString_AS_STRING(self);
2670 to_s = PyString_AS_STRING(to);
2671 to_len = PyString_GET_SIZE(to);
2672 result_s = PyString_AS_STRING(result);
2673
2674 /* TODO: special case single character, which doesn't need memcpy */
2675
2676 /* Lay the first one down (guaranteed this will occur) */
2677 memcpy(result_s, to_s, to_len);
2678 result_s += to_len;
2679 count -= 1;
2680
2681 for (i=0; i<count; i++) {
2682 *result_s++ = *self_s++;
2683 memcpy(result_s, to_s, to_len);
2684 result_s += to_len;
2685 }
2686
2687 /* Copy the rest of the original string */
2688 memcpy(result_s, self_s, self_len-i);
2689
2690 return result;
2691}
2692
2693/* Special case for deleting a single character */
2694/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002695Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696replace_delete_single_character(PyStringObject *self,
2697 char from_c, Py_ssize_t maxcount)
2698{
2699 char *self_s, *result_s;
2700 char *start, *next, *end;
2701 Py_ssize_t self_len, result_len;
2702 Py_ssize_t count;
2703 PyStringObject *result;
2704
2705 self_len = PyString_GET_SIZE(self);
2706 self_s = PyString_AS_STRING(self);
2707
2708 count = countchar(self_s, self_len, from_c);
2709 if (count == 0) {
2710 return return_self(self);
2711 }
2712 if (count > maxcount)
2713 count = maxcount;
2714
2715 result_len = self_len - count; /* from_len == 1 */
2716 assert(result_len>=0);
2717
2718 if ( (result = (PyStringObject *)
2719 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2720 return NULL;
2721 result_s = PyString_AS_STRING(result);
2722
2723 start = self_s;
2724 end = self_s + self_len;
2725 while (count-- > 0) {
2726 next = findchar(start, end-start, from_c);
2727 if (next == NULL)
2728 break;
2729 memcpy(result_s, start, next-start);
2730 result_s += (next-start);
2731 start = next+1;
2732 }
2733 memcpy(result_s, start, end-start);
2734
2735 return result;
2736}
2737
2738/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2739
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002740Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741replace_delete_substring(PyStringObject *self, PyStringObject *from,
2742 Py_ssize_t maxcount) {
2743 char *self_s, *from_s, *result_s;
2744 char *start, *next, *end;
2745 Py_ssize_t self_len, from_len, result_len;
2746 Py_ssize_t count, offset;
2747 PyStringObject *result;
2748
2749 self_len = PyString_GET_SIZE(self);
2750 self_s = PyString_AS_STRING(self);
2751 from_len = PyString_GET_SIZE(from);
2752 from_s = PyString_AS_STRING(from);
2753
2754 count = countstring(self_s, self_len,
2755 from_s, from_len,
2756 0, self_len, 1);
2757
2758 if (count > maxcount)
2759 count = maxcount;
2760
2761 if (count == 0) {
2762 /* no matches */
2763 return return_self(self);
2764 }
2765
2766 result_len = self_len - (count * from_len);
2767 assert (result_len>=0);
2768
2769 if ( (result = (PyStringObject *)
2770 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2771 return NULL;
2772
2773 result_s = PyString_AS_STRING(result);
2774
2775 start = self_s;
2776 end = self_s + self_len;
2777 while (count-- > 0) {
2778 offset = findstring(start, end-start,
2779 from_s, from_len,
2780 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002781 if (offset == -1)
2782 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 next = start + offset;
2784
2785 memcpy(result_s, start, next-start);
2786
2787 result_s += (next-start);
2788 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002789 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002790 memcpy(result_s, start, end-start);
2791 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002792}
2793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002795Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796replace_single_character_in_place(PyStringObject *self,
2797 char from_c, char to_c,
2798 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002799{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 char *self_s, *result_s, *start, *end, *next;
2801 Py_ssize_t self_len;
2802 PyStringObject *result;
2803
2804 /* The result string will be the same size */
2805 self_s = PyString_AS_STRING(self);
2806 self_len = PyString_GET_SIZE(self);
2807
2808 next = findchar(self_s, self_len, from_c);
2809
2810 if (next == NULL) {
2811 /* No matches; return the original string */
2812 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814
2815 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002816 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817 if (result == NULL)
2818 return NULL;
2819 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002820 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821
2822 /* change everything in-place, starting with this one */
2823 start = result_s + (next-self_s);
2824 *start = to_c;
2825 start++;
2826 end = result_s + self_len;
2827
2828 while (--maxcount > 0) {
2829 next = findchar(start, end-start, from_c);
2830 if (next == NULL)
2831 break;
2832 *next = to_c;
2833 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002834 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835
2836 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002837}
2838
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002840Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841replace_substring_in_place(PyStringObject *self,
2842 PyStringObject *from,
2843 PyStringObject *to,
2844 Py_ssize_t maxcount)
2845{
2846 char *result_s, *start, *end;
2847 char *self_s, *from_s, *to_s;
2848 Py_ssize_t self_len, from_len, offset;
2849 PyStringObject *result;
2850
2851 /* The result string will be the same size */
2852
2853 self_s = PyString_AS_STRING(self);
2854 self_len = PyString_GET_SIZE(self);
2855
2856 from_s = PyString_AS_STRING(from);
2857 from_len = PyString_GET_SIZE(from);
2858 to_s = PyString_AS_STRING(to);
2859
2860 offset = findstring(self_s, self_len,
2861 from_s, from_len,
2862 0, self_len, FORWARD);
2863
2864 if (offset == -1) {
2865 /* No matches; return the original string */
2866 return return_self(self);
2867 }
2868
2869 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002870 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871 if (result == NULL)
2872 return NULL;
2873 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002874 memcpy(result_s, self_s, self_len);
2875
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876
2877 /* change everything in-place, starting with this one */
2878 start = result_s + offset;
2879 memcpy(start, to_s, from_len);
2880 start += from_len;
2881 end = result_s + self_len;
2882
2883 while ( --maxcount > 0) {
2884 offset = findstring(start, end-start,
2885 from_s, from_len,
2886 0, end-start, FORWARD);
2887 if (offset==-1)
2888 break;
2889 memcpy(start+offset, to_s, from_len);
2890 start += offset+from_len;
2891 }
2892
2893 return result;
2894}
2895
2896/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002897Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898replace_single_character(PyStringObject *self,
2899 char from_c,
2900 PyStringObject *to,
2901 Py_ssize_t maxcount)
2902{
2903 char *self_s, *to_s, *result_s;
2904 char *start, *next, *end;
2905 Py_ssize_t self_len, to_len, result_len;
2906 Py_ssize_t count, product;
2907 PyStringObject *result;
2908
2909 self_s = PyString_AS_STRING(self);
2910 self_len = PyString_GET_SIZE(self);
2911
2912 count = countchar(self_s, self_len, from_c);
2913 if (count > maxcount)
2914 count = maxcount;
2915
2916 if (count == 0) {
2917 /* no matches, return unchanged */
2918 return return_self(self);
2919 }
2920
2921 to_s = PyString_AS_STRING(to);
2922 to_len = PyString_GET_SIZE(to);
2923
2924 /* use the difference between current and new, hence the "-1" */
2925 /* result_len = self_len + count * (to_len-1) */
2926 product = count * (to_len-1);
2927 if (product / (to_len-1) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2930 }
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
2936
2937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
2941
2942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 next = findchar(start, end-start, from_c);
2946 if (next == NULL)
2947 break;
2948
2949 if (next == start) {
2950 /* replace with the 'to' */
2951 memcpy(result_s, to_s, to_len);
2952 result_s += to_len;
2953 start += 1;
2954 } else {
2955 /* copy the unchanged old then the 'to' */
2956 memcpy(result_s, start, next-start);
2957 result_s += (next-start);
2958 memcpy(result_s, to_s, to_len);
2959 result_s += to_len;
2960 start = next+1;
2961 }
2962 }
2963 /* Copy the remainder of the remaining string */
2964 memcpy(result_s, start, end-start);
2965
2966 return result;
2967}
2968
2969/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002970Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002971replace_substring(PyStringObject *self,
2972 PyStringObject *from,
2973 PyStringObject *to,
2974 Py_ssize_t maxcount) {
2975 char *self_s, *from_s, *to_s, *result_s;
2976 char *start, *next, *end;
2977 Py_ssize_t self_len, from_len, to_len, result_len;
2978 Py_ssize_t count, offset, product;
2979 PyStringObject *result;
2980
2981 self_s = PyString_AS_STRING(self);
2982 self_len = PyString_GET_SIZE(self);
2983 from_s = PyString_AS_STRING(from);
2984 from_len = PyString_GET_SIZE(from);
2985
2986 count = countstring(self_s, self_len,
2987 from_s, from_len,
2988 0, self_len, FORWARD);
2989 if (count > maxcount)
2990 count = maxcount;
2991
2992 if (count == 0) {
2993 /* no matches, return unchanged */
2994 return return_self(self);
2995 }
2996
2997 to_s = PyString_AS_STRING(to);
2998 to_len = PyString_GET_SIZE(to);
2999
3000 /* Check for overflow */
3001 /* result_len = self_len + count * (to_len-from_len) */
3002 product = count * (to_len-from_len);
3003 if (product / (to_len-from_len) != count) {
3004 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3005 return NULL;
3006 }
3007 result_len = self_len + product;
3008 if (result_len < 0) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012
3013 if ( (result = (PyStringObject *)
3014 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3015 return NULL;
3016 result_s = PyString_AS_STRING(result);
3017
3018 start = self_s;
3019 end = self_s + self_len;
3020 while (count-- > 0) {
3021 offset = findstring(start, end-start,
3022 from_s, from_len,
3023 0, end-start, FORWARD);
3024 if (offset == -1)
3025 break;
3026 next = start+offset;
3027 if (next == start) {
3028 /* replace with the 'to' */
3029 memcpy(result_s, to_s, to_len);
3030 result_s += to_len;
3031 start += from_len;
3032 } else {
3033 /* copy the unchanged old then the 'to' */
3034 memcpy(result_s, start, next-start);
3035 result_s += (next-start);
3036 memcpy(result_s, to_s, to_len);
3037 result_s += to_len;
3038 start = next+from_len;
3039 }
3040 }
3041 /* Copy the remainder of the remaining string */
3042 memcpy(result_s, start, end-start);
3043
3044 return result;
3045}
3046
3047
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003048Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003049replace(PyStringObject *self,
3050 PyStringObject *from,
3051 PyStringObject *to,
3052 Py_ssize_t maxcount)
3053{
3054 Py_ssize_t from_len, to_len;
3055
3056 if (maxcount < 0) {
3057 maxcount = PY_SSIZE_T_MAX;
3058 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3059 /* nothing to do; return the original string */
3060 return return_self(self);
3061 }
3062
3063 from_len = PyString_GET_SIZE(from);
3064 to_len = PyString_GET_SIZE(to);
3065
3066 if (maxcount == 0 ||
3067 (from_len == 0 && to_len == 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self);
3070 }
3071
3072 /* Handle zero-length special cases */
3073
3074 if (from_len == 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self, to, maxcount);
3079 }
3080
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
3084 if (PyString_GET_SIZE(self) == 0) {
3085 return return_self(self);
3086 }
3087
3088 if (to_len == 0) {
3089 /* delete all occurances of 'from' string */
3090 if (from_len == 1) {
3091 return replace_delete_single_character(
3092 self, PyString_AS_STRING(from)[0], maxcount);
3093 } else {
3094 return replace_delete_substring(self, from, maxcount);
3095 }
3096 }
3097
3098 /* Handle special case where both strings have the same length */
3099
3100 if (from_len == to_len) {
3101 if (from_len == 1) {
3102 return replace_single_character_in_place(
3103 self,
3104 PyString_AS_STRING(from)[0],
3105 PyString_AS_STRING(to)[0],
3106 maxcount);
3107 } else {
3108 return replace_substring_in_place(
3109 self, from, to, maxcount);
3110 }
3111 }
3112
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len == 1) {
3115 return replace_single_character(self, PyString_AS_STRING(from)[0],
3116 to, maxcount);
3117 } else {
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self, from, to, maxcount);
3120 }
3121}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003124"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003125\n\
3126Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003127old replaced by new. If the optional argument count is\n\
3128given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003129
3130static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003131string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003133 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003134 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003135 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003136 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003141 if (PyString_Check(from)) {
3142 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003145 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003146 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003147 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003148#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003149 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 return NULL;
3151
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003152 if (PyString_Check(to)) {
3153 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003155#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003156 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003157 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003158 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003159#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003160 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
3162
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 return (PyObject *)replace((PyStringObject *) self,
3164 (PyStringObject *) from,
3165 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166}
3167
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003168/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003170PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003171"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003173Return True if S starts with the specified prefix, False otherwise.\n\
3174With optional start, test S beginning at that position.\n\
3175With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003176
3177static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003178string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003181 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003183 Py_ssize_t plen;
3184 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003185 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003187
Guido van Rossumc6821402000-05-08 14:08:05 +00003188 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3189 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 return NULL;
3191 if (PyString_Check(subobj)) {
3192 prefix = PyString_AS_STRING(subobj);
3193 plen = PyString_GET_SIZE(subobj);
3194 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003195#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003196 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003197 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003198 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003199 subobj, start, end, -1);
3200 if (rc == -1)
3201 return NULL;
3202 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003203 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003204 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003205#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003207 return NULL;
3208
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003209 string_adjust_indices(&start, &end, len);
3210
3211 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003212 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 if (end-start >= plen)
3215 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3216 else
3217 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218}
3219
3220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003222"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003224Return True if S ends with the specified suffix, False otherwise.\n\
3225With optional start, test S beginning at that position.\n\
3226With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227
3228static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003229string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003230{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003232 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003234 Py_ssize_t slen;
3235 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003236 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003238
Guido van Rossumc6821402000-05-08 14:08:05 +00003239 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3240 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241 return NULL;
3242 if (PyString_Check(subobj)) {
3243 suffix = PyString_AS_STRING(subobj);
3244 slen = PyString_GET_SIZE(subobj);
3245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003246#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003247 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003248 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003249 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003250 subobj, start, end, +1);
3251 if (rc == -1)
3252 return NULL;
3253 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003254 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003255 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003256#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003258 return NULL;
3259
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003260 string_adjust_indices(&start, &end, len);
3261
3262 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003263 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003264
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003265 if (end-slen > start)
3266 start = end - slen;
3267 if (end-start >= slen)
3268 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3269 else
3270 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003271}
3272
3273
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003274PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003275"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003276\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003277Encodes S using the codec registered for encoding. encoding defaults\n\
3278to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003279handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003280a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3281'xmlcharrefreplace' as well as any other name registered with\n\
3282codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003283
3284static PyObject *
3285string_encode(PyStringObject *self, PyObject *args)
3286{
3287 char *encoding = NULL;
3288 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003289 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003290
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003291 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3292 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003293 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003294 if (v == NULL)
3295 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003296 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3297 PyErr_Format(PyExc_TypeError,
3298 "encoder did not return a string/unicode object "
3299 "(type=%.400s)",
3300 v->ob_type->tp_name);
3301 Py_DECREF(v);
3302 return NULL;
3303 }
3304 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003305
3306 onError:
3307 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003308}
3309
3310
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003311PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003312"S.decode([encoding[,errors]]) -> object\n\
3313\n\
3314Decodes S using the codec registered for encoding. encoding defaults\n\
3315to the default encoding. errors may be given to set a different error\n\
3316handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003317a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3318as well as any other name registerd with codecs.register_error that is\n\
3319able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003320
3321static PyObject *
3322string_decode(PyStringObject *self, PyObject *args)
3323{
3324 char *encoding = NULL;
3325 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003326 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003327
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003328 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3329 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003330 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003331 if (v == NULL)
3332 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003333 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3334 PyErr_Format(PyExc_TypeError,
3335 "decoder did not return a string/unicode object "
3336 "(type=%.400s)",
3337 v->ob_type->tp_name);
3338 Py_DECREF(v);
3339 return NULL;
3340 }
3341 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003342
3343 onError:
3344 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003345}
3346
3347
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003348PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003349"S.expandtabs([tabsize]) -> string\n\
3350\n\
3351Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003352If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353
3354static PyObject*
3355string_expandtabs(PyStringObject *self, PyObject *args)
3356{
3357 const char *e, *p;
3358 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003359 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003360 PyObject *u;
3361 int tabsize = 8;
3362
3363 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3364 return NULL;
3365
Thomas Wouters7e474022000-07-16 12:04:32 +00003366 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 i = j = 0;
3368 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3369 for (p = PyString_AS_STRING(self); p < e; p++)
3370 if (*p == '\t') {
3371 if (tabsize > 0)
3372 j += tabsize - (j % tabsize);
3373 }
3374 else {
3375 j++;
3376 if (*p == '\n' || *p == '\r') {
3377 i += j;
3378 j = 0;
3379 }
3380 }
3381
3382 /* Second pass: create output string and fill it */
3383 u = PyString_FromStringAndSize(NULL, i + j);
3384 if (!u)
3385 return NULL;
3386
3387 j = 0;
3388 q = PyString_AS_STRING(u);
3389
3390 for (p = PyString_AS_STRING(self); p < e; p++)
3391 if (*p == '\t') {
3392 if (tabsize > 0) {
3393 i = tabsize - (j % tabsize);
3394 j += i;
3395 while (i--)
3396 *q++ = ' ';
3397 }
3398 }
3399 else {
3400 j++;
3401 *q++ = *p;
3402 if (*p == '\n' || *p == '\r')
3403 j = 0;
3404 }
3405
3406 return u;
3407}
3408
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003409Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003410pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411{
3412 PyObject *u;
3413
3414 if (left < 0)
3415 left = 0;
3416 if (right < 0)
3417 right = 0;
3418
Tim Peters8fa5dd02001-09-12 02:18:30 +00003419 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420 Py_INCREF(self);
3421 return (PyObject *)self;
3422 }
3423
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003424 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 left + PyString_GET_SIZE(self) + right);
3426 if (u) {
3427 if (left)
3428 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003429 memcpy(PyString_AS_STRING(u) + left,
3430 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 PyString_GET_SIZE(self));
3432 if (right)
3433 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3434 fill, right);
3435 }
3436
3437 return u;
3438}
3439
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003440PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003441"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003442"\n"
3443"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445
3446static PyObject *
3447string_ljust(PyStringObject *self, PyObject *args)
3448{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003449 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003450 char fillchar = ' ';
3451
Thomas Wouters4abb3662006-04-19 14:50:15 +00003452 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453 return NULL;
3454
Tim Peters8fa5dd02001-09-12 02:18:30 +00003455 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456 Py_INCREF(self);
3457 return (PyObject*) self;
3458 }
3459
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461}
3462
3463
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003464PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003465"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003466"\n"
3467"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003468"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469
3470static PyObject *
3471string_rjust(PyStringObject *self, PyObject *args)
3472{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003473 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474 char fillchar = ' ';
3475
Thomas Wouters4abb3662006-04-19 14:50:15 +00003476 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477 return NULL;
3478
Tim Peters8fa5dd02001-09-12 02:18:30 +00003479 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480 Py_INCREF(self);
3481 return (PyObject*) self;
3482 }
3483
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003484 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003485}
3486
3487
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003490"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003491"Return S centered in a string of length width. Padding is\n"
3492"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493
3494static PyObject *
3495string_center(PyStringObject *self, PyObject *args)
3496{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003497 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003498 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003499 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500
Thomas Wouters4abb3662006-04-19 14:50:15 +00003501 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502 return NULL;
3503
Tim Peters8fa5dd02001-09-12 02:18:30 +00003504 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 Py_INCREF(self);
3506 return (PyObject*) self;
3507 }
3508
3509 marg = width - PyString_GET_SIZE(self);
3510 left = marg / 2 + (marg & width & 1);
3511
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003512 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513}
3514
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003515PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003516"S.zfill(width) -> string\n"
3517"\n"
3518"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003519"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003520
3521static PyObject *
3522string_zfill(PyStringObject *self, PyObject *args)
3523{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003524 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003525 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003526 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003527 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003528
Thomas Wouters4abb3662006-04-19 14:50:15 +00003529 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003530 return NULL;
3531
3532 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003533 if (PyString_CheckExact(self)) {
3534 Py_INCREF(self);
3535 return (PyObject*) self;
3536 }
3537 else
3538 return PyString_FromStringAndSize(
3539 PyString_AS_STRING(self),
3540 PyString_GET_SIZE(self)
3541 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003542 }
3543
3544 fill = width - PyString_GET_SIZE(self);
3545
3546 s = pad(self, fill, 0, '0');
3547
3548 if (s == NULL)
3549 return NULL;
3550
3551 p = PyString_AS_STRING(s);
3552 if (p[fill] == '+' || p[fill] == '-') {
3553 /* move sign to beginning of string */
3554 p[0] = p[fill];
3555 p[fill] = '0';
3556 }
3557
3558 return (PyObject*) s;
3559}
3560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003561PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003562"S.isspace() -> bool\n\
3563\n\
3564Return True if all characters in S are whitespace\n\
3565and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566
3567static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003568string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569{
Fred Drakeba096332000-07-09 07:04:36 +00003570 register const unsigned char *p
3571 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003572 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003573
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574 /* Shortcut for single character strings */
3575 if (PyString_GET_SIZE(self) == 1 &&
3576 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003579 /* Special case for empty strings */
3580 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003582
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583 e = p + PyString_GET_SIZE(self);
3584 for (; p < e; p++) {
3585 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003587 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589}
3590
3591
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003592PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003595Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597
3598static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003599string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600{
Fred Drakeba096332000-07-09 07:04:36 +00003601 register const unsigned char *p
3602 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003603 register const unsigned char *e;
3604
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 /* Shortcut for single character strings */
3606 if (PyString_GET_SIZE(self) == 1 &&
3607 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609
3610 /* Special case for empty strings */
3611 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613
3614 e = p + PyString_GET_SIZE(self);
3615 for (; p < e; p++) {
3616 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003620}
3621
3622
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003623PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003625\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003626Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003627and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003628
3629static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003630string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003631{
Fred Drakeba096332000-07-09 07:04:36 +00003632 register const unsigned char *p
3633 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003634 register const unsigned char *e;
3635
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003636 /* Shortcut for single character strings */
3637 if (PyString_GET_SIZE(self) == 1 &&
3638 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003640
3641 /* Special case for empty strings */
3642 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003644
3645 e = p + PyString_GET_SIZE(self);
3646 for (; p < e; p++) {
3647 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003651}
3652
3653
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003654PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003657Return True if all characters in S are digits\n\
3658and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
3660static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003661string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662{
Fred Drakeba096332000-07-09 07:04:36 +00003663 register const unsigned char *p
3664 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003665 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 /* Shortcut for single character strings */
3668 if (PyString_GET_SIZE(self) == 1 &&
3669 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003672 /* Special case for empty strings */
3673 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003675
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 e = p + PyString_GET_SIZE(self);
3677 for (; p < e; p++) {
3678 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682}
3683
3684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003685PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003689at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690
3691static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003692string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693{
Fred Drakeba096332000-07-09 07:04:36 +00003694 register const unsigned char *p
3695 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003696 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697 int cased;
3698
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 /* Shortcut for single character strings */
3700 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003703 /* Special case for empty strings */
3704 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003706
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 e = p + PyString_GET_SIZE(self);
3708 cased = 0;
3709 for (; p < e; p++) {
3710 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003711 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 else if (!cased && islower(*p))
3713 cased = 1;
3714 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716}
3717
3718
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003719PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003722Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003723at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724
3725static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003726string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003727{
Fred Drakeba096332000-07-09 07:04:36 +00003728 register const unsigned char *p
3729 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003730 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 int cased;
3732
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 /* Shortcut for single character strings */
3734 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003735 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003737 /* Special case for empty strings */
3738 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003740
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741 e = p + PyString_GET_SIZE(self);
3742 cased = 0;
3743 for (; p < e; p++) {
3744 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003745 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 else if (!cased && isupper(*p))
3747 cased = 1;
3748 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750}
3751
3752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003753PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003756Return True if S is a titlecased string and there is at least one\n\
3757character in S, i.e. uppercase characters may only follow uncased\n\
3758characters and lowercase characters only cased ones. Return False\n\
3759otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760
3761static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003762string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763{
Fred Drakeba096332000-07-09 07:04:36 +00003764 register const unsigned char *p
3765 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003766 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 int cased, previous_is_cased;
3768
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769 /* Shortcut for single character strings */
3770 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003771 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003773 /* Special case for empty strings */
3774 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003775 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003776
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 e = p + PyString_GET_SIZE(self);
3778 cased = 0;
3779 previous_is_cased = 0;
3780 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003781 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782
3783 if (isupper(ch)) {
3784 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003785 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 previous_is_cased = 1;
3787 cased = 1;
3788 }
3789 else if (islower(ch)) {
3790 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003791 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 previous_is_cased = 1;
3793 cased = 1;
3794 }
3795 else
3796 previous_is_cased = 0;
3797 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003798 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799}
3800
3801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003802PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003803"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804\n\
3805Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003806Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003807is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809static PyObject*
3810string_splitlines(PyStringObject *self, PyObject *args)
3811{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003812 register Py_ssize_t i;
3813 register Py_ssize_t j;
3814 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003815 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 PyObject *list;
3817 PyObject *str;
3818 char *data;
3819
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003820 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 return NULL;
3822
3823 data = PyString_AS_STRING(self);
3824 len = PyString_GET_SIZE(self);
3825
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 list = PyList_New(0);
3827 if (!list)
3828 goto onError;
3829
3830 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003831 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003832
Guido van Rossum4c08d552000-03-10 22:55:18 +00003833 /* Find a line and append it */
3834 while (i < len && data[i] != '\n' && data[i] != '\r')
3835 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836
3837 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003838 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003839 if (i < len) {
3840 if (data[i] == '\r' && i + 1 < len &&
3841 data[i+1] == '\n')
3842 i += 2;
3843 else
3844 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003845 if (keepends)
3846 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003848 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849 j = i;
3850 }
3851 if (j < len) {
3852 SPLIT_APPEND(data, j, len);
3853 }
3854
3855 return list;
3856
3857 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003858 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003859 return NULL;
3860}
3861
3862#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003863#undef SPLIT_ADD
3864#undef MAX_PREALLOC
3865#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003866
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003867static PyObject *
3868string_getnewargs(PyStringObject *v)
3869{
3870 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3871}
3872
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003873
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003874static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003875string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003876 /* Counterparts of the obsolete stropmodule functions; except
3877 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003878 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3879 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003880 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003881 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3882 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003883 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3884 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3885 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3886 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3887 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3888 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3889 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003890 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3891 capitalize__doc__},
3892 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3893 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3894 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003895 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003896 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3897 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3898 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3899 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3900 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3901 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3902 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3903 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3904 startswith__doc__},
3905 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3906 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3907 swapcase__doc__},
3908 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3909 translate__doc__},
3910 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3911 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3912 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3913 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3914 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3915 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3916 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3917 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3918 expandtabs__doc__},
3919 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3920 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003921 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003922 {NULL, NULL} /* sentinel */
3923};
3924
Jeremy Hylton938ace62002-07-17 16:30:39 +00003925static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003926str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3927
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003928static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003929string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003930{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003931 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003932 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003933
Guido van Rossumae960af2001-08-30 03:11:59 +00003934 if (type != &PyString_Type)
3935 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003936 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3937 return NULL;
3938 if (x == NULL)
3939 return PyString_FromString("");
3940 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003941}
3942
Guido van Rossumae960af2001-08-30 03:11:59 +00003943static PyObject *
3944str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3945{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003946 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003947 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003948
3949 assert(PyType_IsSubtype(type, &PyString_Type));
3950 tmp = string_new(&PyString_Type, args, kwds);
3951 if (tmp == NULL)
3952 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003953 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003954 n = PyString_GET_SIZE(tmp);
3955 pnew = type->tp_alloc(type, n);
3956 if (pnew != NULL) {
3957 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003958 ((PyStringObject *)pnew)->ob_shash =
3959 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003960 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003961 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003962 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003963 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003964}
3965
Guido van Rossumcacfc072002-05-24 19:01:59 +00003966static PyObject *
3967basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3968{
3969 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003970 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003971 return NULL;
3972}
3973
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003974static PyObject *
3975string_mod(PyObject *v, PyObject *w)
3976{
3977 if (!PyString_Check(v)) {
3978 Py_INCREF(Py_NotImplemented);
3979 return Py_NotImplemented;
3980 }
3981 return PyString_Format(v, w);
3982}
3983
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003984PyDoc_STRVAR(basestring_doc,
3985"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003986
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003987static PyNumberMethods string_as_number = {
3988 0, /*nb_add*/
3989 0, /*nb_subtract*/
3990 0, /*nb_multiply*/
3991 0, /*nb_divide*/
3992 string_mod, /*nb_remainder*/
3993};
3994
3995
Guido van Rossumcacfc072002-05-24 19:01:59 +00003996PyTypeObject PyBaseString_Type = {
3997 PyObject_HEAD_INIT(&PyType_Type)
3998 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003999 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004000 0,
4001 0,
4002 0, /* tp_dealloc */
4003 0, /* tp_print */
4004 0, /* tp_getattr */
4005 0, /* tp_setattr */
4006 0, /* tp_compare */
4007 0, /* tp_repr */
4008 0, /* tp_as_number */
4009 0, /* tp_as_sequence */
4010 0, /* tp_as_mapping */
4011 0, /* tp_hash */
4012 0, /* tp_call */
4013 0, /* tp_str */
4014 0, /* tp_getattro */
4015 0, /* tp_setattro */
4016 0, /* tp_as_buffer */
4017 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4018 basestring_doc, /* tp_doc */
4019 0, /* tp_traverse */
4020 0, /* tp_clear */
4021 0, /* tp_richcompare */
4022 0, /* tp_weaklistoffset */
4023 0, /* tp_iter */
4024 0, /* tp_iternext */
4025 0, /* tp_methods */
4026 0, /* tp_members */
4027 0, /* tp_getset */
4028 &PyBaseObject_Type, /* tp_base */
4029 0, /* tp_dict */
4030 0, /* tp_descr_get */
4031 0, /* tp_descr_set */
4032 0, /* tp_dictoffset */
4033 0, /* tp_init */
4034 0, /* tp_alloc */
4035 basestring_new, /* tp_new */
4036 0, /* tp_free */
4037};
4038
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004039PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004040"str(object) -> string\n\
4041\n\
4042Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004043If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004044
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004045PyTypeObject PyString_Type = {
4046 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004048 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004050 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004051 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004052 (printfunc)string_print, /* tp_print */
4053 0, /* tp_getattr */
4054 0, /* tp_setattr */
4055 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004056 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004057 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004058 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004059 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004060 (hashfunc)string_hash, /* tp_hash */
4061 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004062 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004063 PyObject_GenericGetAttr, /* tp_getattro */
4064 0, /* tp_setattro */
4065 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004066 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004067 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004068 string_doc, /* tp_doc */
4069 0, /* tp_traverse */
4070 0, /* tp_clear */
4071 (richcmpfunc)string_richcompare, /* tp_richcompare */
4072 0, /* tp_weaklistoffset */
4073 0, /* tp_iter */
4074 0, /* tp_iternext */
4075 string_methods, /* tp_methods */
4076 0, /* tp_members */
4077 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004078 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004079 0, /* tp_dict */
4080 0, /* tp_descr_get */
4081 0, /* tp_descr_set */
4082 0, /* tp_dictoffset */
4083 0, /* tp_init */
4084 0, /* tp_alloc */
4085 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004086 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004087};
4088
4089void
Fred Drakeba096332000-07-09 07:04:36 +00004090PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004091{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004092 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004093 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004094 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004095 if (w == NULL || !PyString_Check(*pv)) {
4096 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004097 *pv = NULL;
4098 return;
4099 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004100 v = string_concat((PyStringObject *) *pv, w);
4101 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004102 *pv = v;
4103}
4104
Guido van Rossum013142a1994-08-30 08:19:36 +00004105void
Fred Drakeba096332000-07-09 07:04:36 +00004106PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004107{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004108 PyString_Concat(pv, w);
4109 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004110}
4111
4112
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004113/* The following function breaks the notion that strings are immutable:
4114 it changes the size of a string. We get away with this only if there
4115 is only one module referencing the object. You can also think of it
4116 as creating a new string object and destroying the old one, only
4117 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004118 already be known to some other part of the code...
4119 Note that if there's not enough memory to resize the string, the original
4120 string object at *pv is deallocated, *pv is set to NULL, an "out of
4121 memory" exception is set, and -1 is returned. Else (on success) 0 is
4122 returned, and the value in *pv may or may not be the same as on input.
4123 As always, an extra byte is allocated for a trailing \0 byte (newsize
4124 does *not* include that), and a trailing \0 byte is stored.
4125*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004126
4127int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004128_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004129{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004130 register PyObject *v;
4131 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004132 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004133 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4134 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004135 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004136 Py_DECREF(v);
4137 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004138 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004139 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004140 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004141 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004142 _Py_ForgetReference(v);
4143 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004144 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004145 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004146 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004147 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004148 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004150 _Py_NewReference(*pv);
4151 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004152 sv->ob_size = newsize;
4153 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004154 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004155 return 0;
4156}
Guido van Rossume5372401993-03-16 12:15:04 +00004157
4158/* Helpers for formatstring */
4159
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004160Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004161getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004162{
Thomas Wouters977485d2006-02-16 15:59:12 +00004163 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004164 if (argidx < arglen) {
4165 (*p_argidx)++;
4166 if (arglen < 0)
4167 return args;
4168 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004169 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004170 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004171 PyErr_SetString(PyExc_TypeError,
4172 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004173 return NULL;
4174}
4175
Tim Peters38fd5b62000-09-21 05:43:11 +00004176/* Format codes
4177 * F_LJUST '-'
4178 * F_SIGN '+'
4179 * F_BLANK ' '
4180 * F_ALT '#'
4181 * F_ZERO '0'
4182 */
Guido van Rossume5372401993-03-16 12:15:04 +00004183#define F_LJUST (1<<0)
4184#define F_SIGN (1<<1)
4185#define F_BLANK (1<<2)
4186#define F_ALT (1<<3)
4187#define F_ZERO (1<<4)
4188
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004189Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004190formatfloat(char *buf, size_t buflen, int flags,
4191 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004192{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004193 /* fmt = '%#.' + `prec` + `type`
4194 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004195 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004196 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004197 x = PyFloat_AsDouble(v);
4198 if (x == -1.0 && PyErr_Occurred()) {
4199 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004200 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004201 }
Guido van Rossume5372401993-03-16 12:15:04 +00004202 if (prec < 0)
4203 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004204 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4205 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004206 /* Worst case length calc to ensure no buffer overrun:
4207
4208 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004209 fmt = %#.<prec>g
4210 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004211 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004212 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004213
4214 'f' formats:
4215 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4216 len = 1 + 50 + 1 + prec = 52 + prec
4217
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004218 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004219 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004220
4221 */
4222 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4223 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004224 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004225 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004226 return -1;
4227 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004228 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4229 (flags&F_ALT) ? "#" : "",
4230 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004231 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004232 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004233}
4234
Tim Peters38fd5b62000-09-21 05:43:11 +00004235/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4236 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4237 * Python's regular ints.
4238 * Return value: a new PyString*, or NULL if error.
4239 * . *pbuf is set to point into it,
4240 * *plen set to the # of chars following that.
4241 * Caller must decref it when done using pbuf.
4242 * The string starting at *pbuf is of the form
4243 * "-"? ("0x" | "0X")? digit+
4244 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004245 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004246 * There will be at least prec digits, zero-filled on the left if
4247 * necessary to get that many.
4248 * val object to be converted
4249 * flags bitmask of format flags; only F_ALT is looked at
4250 * prec minimum number of digits; 0-fill on left if needed
4251 * type a character in [duoxX]; u acts the same as d
4252 *
4253 * CAUTION: o, x and X conversions on regular ints can never
4254 * produce a '-' sign, but can for Python's unbounded ints.
4255 */
4256PyObject*
4257_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4258 char **pbuf, int *plen)
4259{
4260 PyObject *result = NULL;
4261 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004262 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004263 int sign; /* 1 if '-', else 0 */
4264 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004265 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004266 int numdigits; /* len == numnondigits + numdigits */
4267 int numnondigits = 0;
4268
4269 switch (type) {
4270 case 'd':
4271 case 'u':
4272 result = val->ob_type->tp_str(val);
4273 break;
4274 case 'o':
4275 result = val->ob_type->tp_as_number->nb_oct(val);
4276 break;
4277 case 'x':
4278 case 'X':
4279 numnondigits = 2;
4280 result = val->ob_type->tp_as_number->nb_hex(val);
4281 break;
4282 default:
4283 assert(!"'type' not in [duoxX]");
4284 }
4285 if (!result)
4286 return NULL;
4287
4288 /* To modify the string in-place, there can only be one reference. */
4289 if (result->ob_refcnt != 1) {
4290 PyErr_BadInternalCall();
4291 return NULL;
4292 }
4293 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004294 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004295 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004296 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4297 return NULL;
4298 }
4299 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004300 if (buf[len-1] == 'L') {
4301 --len;
4302 buf[len] = '\0';
4303 }
4304 sign = buf[0] == '-';
4305 numnondigits += sign;
4306 numdigits = len - numnondigits;
4307 assert(numdigits > 0);
4308
Tim Petersfff53252001-04-12 18:38:48 +00004309 /* Get rid of base marker unless F_ALT */
4310 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004311 /* Need to skip 0x, 0X or 0. */
4312 int skipped = 0;
4313 switch (type) {
4314 case 'o':
4315 assert(buf[sign] == '0');
4316 /* If 0 is only digit, leave it alone. */
4317 if (numdigits > 1) {
4318 skipped = 1;
4319 --numdigits;
4320 }
4321 break;
4322 case 'x':
4323 case 'X':
4324 assert(buf[sign] == '0');
4325 assert(buf[sign + 1] == 'x');
4326 skipped = 2;
4327 numnondigits -= 2;
4328 break;
4329 }
4330 if (skipped) {
4331 buf += skipped;
4332 len -= skipped;
4333 if (sign)
4334 buf[0] = '-';
4335 }
4336 assert(len == numnondigits + numdigits);
4337 assert(numdigits > 0);
4338 }
4339
4340 /* Fill with leading zeroes to meet minimum width. */
4341 if (prec > numdigits) {
4342 PyObject *r1 = PyString_FromStringAndSize(NULL,
4343 numnondigits + prec);
4344 char *b1;
4345 if (!r1) {
4346 Py_DECREF(result);
4347 return NULL;
4348 }
4349 b1 = PyString_AS_STRING(r1);
4350 for (i = 0; i < numnondigits; ++i)
4351 *b1++ = *buf++;
4352 for (i = 0; i < prec - numdigits; i++)
4353 *b1++ = '0';
4354 for (i = 0; i < numdigits; i++)
4355 *b1++ = *buf++;
4356 *b1 = '\0';
4357 Py_DECREF(result);
4358 result = r1;
4359 buf = PyString_AS_STRING(result);
4360 len = numnondigits + prec;
4361 }
4362
4363 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004364 if (type == 'X') {
4365 /* Need to convert all lower case letters to upper case.
4366 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004367 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004368 if (buf[i] >= 'a' && buf[i] <= 'x')
4369 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004370 }
4371 *pbuf = buf;
4372 *plen = len;
4373 return result;
4374}
4375
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004376Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004377formatint(char *buf, size_t buflen, int flags,
4378 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004379{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004380 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004381 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4382 + 1 + 1 = 24 */
4383 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004384 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004385 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004386
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004387 x = PyInt_AsLong(v);
4388 if (x == -1 && PyErr_Occurred()) {
4389 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004390 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004391 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004392 if (x < 0 && type == 'u') {
4393 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004394 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004395 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4396 sign = "-";
4397 else
4398 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004399 if (prec < 0)
4400 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401
4402 if ((flags & F_ALT) &&
4403 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004404 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004405 * of issues that cause pain:
4406 * - when 0 is being converted, the C standard leaves off
4407 * the '0x' or '0X', which is inconsistent with other
4408 * %#x/%#X conversions and inconsistent with Python's
4409 * hex() function
4410 * - there are platforms that violate the standard and
4411 * convert 0 with the '0x' or '0X'
4412 * (Metrowerks, Compaq Tru64)
4413 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004414 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004415 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004416 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004417 * We can achieve the desired consistency by inserting our
4418 * own '0x' or '0X' prefix, and substituting %x/%X in place
4419 * of %#x/%#X.
4420 *
4421 * Note that this is the same approach as used in
4422 * formatint() in unicodeobject.c
4423 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004424 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4425 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004426 }
4427 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004428 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4429 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004430 prec, type);
4431 }
4432
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004433 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4434 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004435 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004436 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004437 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004438 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004439 return -1;
4440 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004441 if (sign[0])
4442 PyOS_snprintf(buf, buflen, fmt, -x);
4443 else
4444 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004445 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004446}
4447
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004448Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004449formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004450{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004451 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004452 if (PyString_Check(v)) {
4453 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004454 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004455 }
4456 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004457 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004458 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004459 }
4460 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004461 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004462}
4463
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004464/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4465
4466 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4467 chars are formatted. XXX This is a magic number. Each formatting
4468 routine does bounds checking to ensure no overflow, but a better
4469 solution may be to malloc a buffer of appropriate size for each
4470 format. For now, the current solution is sufficient.
4471*/
4472#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004473
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004475PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004476{
4477 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004478 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004479 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004480 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004481 PyObject *result, *orig_args;
4482#ifdef Py_USING_UNICODE
4483 PyObject *v, *w;
4484#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 PyObject *dict = NULL;
4486 if (format == NULL || !PyString_Check(format) || args == NULL) {
4487 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004488 return NULL;
4489 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004490 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004491 fmt = PyString_AS_STRING(format);
4492 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004493 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004494 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004495 if (result == NULL)
4496 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004497 res = PyString_AsString(result);
4498 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004499 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004500 argidx = 0;
4501 }
4502 else {
4503 arglen = -1;
4504 argidx = -2;
4505 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004506 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4507 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004509 while (--fmtcnt >= 0) {
4510 if (*fmt != '%') {
4511 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004512 rescnt = fmtcnt + 100;
4513 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004514 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004515 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004516 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004517 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004518 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004519 }
4520 *res++ = *fmt++;
4521 }
4522 else {
4523 /* Got a format specifier */
4524 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004525 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004526 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004527 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004528 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004529 PyObject *v = NULL;
4530 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004531 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004532 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004533 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004534 char formatbuf[FORMATBUFLEN];
4535 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004536#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004537 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004538 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004539#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004540
Guido van Rossumda9c2711996-12-05 21:58:58 +00004541 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004542 if (*fmt == '(') {
4543 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004544 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004546 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004547
4548 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004549 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004550 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 goto error;
4552 }
4553 ++fmt;
4554 --fmtcnt;
4555 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004556 /* Skip over balanced parentheses */
4557 while (pcount > 0 && --fmtcnt >= 0) {
4558 if (*fmt == ')')
4559 --pcount;
4560 else if (*fmt == '(')
4561 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004562 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004563 }
4564 keylen = fmt - keystart - 1;
4565 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004567 "incomplete format key");
4568 goto error;
4569 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004570 key = PyString_FromStringAndSize(keystart,
4571 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004572 if (key == NULL)
4573 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004574 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004576 args_owned = 0;
4577 }
4578 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004579 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004580 if (args == NULL) {
4581 goto error;
4582 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004583 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004584 arglen = -1;
4585 argidx = -2;
4586 }
Guido van Rossume5372401993-03-16 12:15:04 +00004587 while (--fmtcnt >= 0) {
4588 switch (c = *fmt++) {
4589 case '-': flags |= F_LJUST; continue;
4590 case '+': flags |= F_SIGN; continue;
4591 case ' ': flags |= F_BLANK; continue;
4592 case '#': flags |= F_ALT; continue;
4593 case '0': flags |= F_ZERO; continue;
4594 }
4595 break;
4596 }
4597 if (c == '*') {
4598 v = getnextarg(args, arglen, &argidx);
4599 if (v == NULL)
4600 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004601 if (!PyInt_Check(v)) {
4602 PyErr_SetString(PyExc_TypeError,
4603 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004604 goto error;
4605 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004606 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004607 if (width < 0) {
4608 flags |= F_LJUST;
4609 width = -width;
4610 }
Guido van Rossume5372401993-03-16 12:15:04 +00004611 if (--fmtcnt >= 0)
4612 c = *fmt++;
4613 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004614 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004615 width = c - '0';
4616 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004617 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004618 if (!isdigit(c))
4619 break;
4620 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004621 PyErr_SetString(
4622 PyExc_ValueError,
4623 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004624 goto error;
4625 }
4626 width = width*10 + (c - '0');
4627 }
4628 }
4629 if (c == '.') {
4630 prec = 0;
4631 if (--fmtcnt >= 0)
4632 c = *fmt++;
4633 if (c == '*') {
4634 v = getnextarg(args, arglen, &argidx);
4635 if (v == NULL)
4636 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004637 if (!PyInt_Check(v)) {
4638 PyErr_SetString(
4639 PyExc_TypeError,
4640 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004641 goto error;
4642 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004643 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004644 if (prec < 0)
4645 prec = 0;
4646 if (--fmtcnt >= 0)
4647 c = *fmt++;
4648 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004649 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004650 prec = c - '0';
4651 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004652 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004653 if (!isdigit(c))
4654 break;
4655 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004656 PyErr_SetString(
4657 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004658 "prec too big");
4659 goto error;
4660 }
4661 prec = prec*10 + (c - '0');
4662 }
4663 }
4664 } /* prec */
4665 if (fmtcnt >= 0) {
4666 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004667 if (--fmtcnt >= 0)
4668 c = *fmt++;
4669 }
4670 }
4671 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004672 PyErr_SetString(PyExc_ValueError,
4673 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004674 goto error;
4675 }
4676 if (c != '%') {
4677 v = getnextarg(args, arglen, &argidx);
4678 if (v == NULL)
4679 goto error;
4680 }
4681 sign = 0;
4682 fill = ' ';
4683 switch (c) {
4684 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004685 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004686 len = 1;
4687 break;
4688 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004689#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004690 if (PyUnicode_Check(v)) {
4691 fmt = fmt_start;
4692 argidx = argidx_start;
4693 goto unicode;
4694 }
Georg Brandld45014b2005-10-01 17:06:00 +00004695#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004696 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004697#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004698 if (temp != NULL && PyUnicode_Check(temp)) {
4699 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004700 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004701 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004702 goto unicode;
4703 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004704#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004705 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004706 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004707 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004708 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004709 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004710 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004711 if (!PyString_Check(temp)) {
4712 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004713 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004714 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004715 goto error;
4716 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004717 pbuf = PyString_AS_STRING(temp);
4718 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004719 if (prec >= 0 && len > prec)
4720 len = prec;
4721 break;
4722 case 'i':
4723 case 'd':
4724 case 'u':
4725 case 'o':
4726 case 'x':
4727 case 'X':
4728 if (c == 'i')
4729 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004730 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004731 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004732 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004733 prec, c, &pbuf, &ilen);
4734 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004735 if (!temp)
4736 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004737 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004738 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004739 else {
4740 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004741 len = formatint(pbuf,
4742 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004743 flags, prec, c, v);
4744 if (len < 0)
4745 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004746 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004747 }
4748 if (flags & F_ZERO)
4749 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004750 break;
4751 case 'e':
4752 case 'E':
4753 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004754 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004755 case 'g':
4756 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004757 if (c == 'F')
4758 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004759 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004760 len = formatfloat(pbuf, sizeof(formatbuf),
4761 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004762 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004763 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004764 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004765 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004766 fill = '0';
4767 break;
4768 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004769#ifdef Py_USING_UNICODE
4770 if (PyUnicode_Check(v)) {
4771 fmt = fmt_start;
4772 argidx = argidx_start;
4773 goto unicode;
4774 }
4775#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004776 pbuf = formatbuf;
4777 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004778 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004779 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004780 break;
4781 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004782 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004783 "unsupported format character '%c' (0x%x) "
4784 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004785 c, c,
4786 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004787 goto error;
4788 }
4789 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004790 if (*pbuf == '-' || *pbuf == '+') {
4791 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004792 len--;
4793 }
4794 else if (flags & F_SIGN)
4795 sign = '+';
4796 else if (flags & F_BLANK)
4797 sign = ' ';
4798 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004799 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004800 }
4801 if (width < len)
4802 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004803 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004804 reslen -= rescnt;
4805 rescnt = width + fmtcnt + 100;
4806 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004807 if (reslen < 0) {
4808 Py_DECREF(result);
4809 return PyErr_NoMemory();
4810 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004811 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004812 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004813 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004814 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004815 }
4816 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004817 if (fill != ' ')
4818 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004819 rescnt--;
4820 if (width > len)
4821 width--;
4822 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4824 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004825 assert(pbuf[1] == c);
4826 if (fill != ' ') {
4827 *res++ = *pbuf++;
4828 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004829 }
Tim Petersfff53252001-04-12 18:38:48 +00004830 rescnt -= 2;
4831 width -= 2;
4832 if (width < 0)
4833 width = 0;
4834 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004835 }
4836 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004837 do {
4838 --rescnt;
4839 *res++ = fill;
4840 } while (--width > len);
4841 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004842 if (fill == ' ') {
4843 if (sign)
4844 *res++ = sign;
4845 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004846 (c == 'x' || c == 'X')) {
4847 assert(pbuf[0] == '0');
4848 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004849 *res++ = *pbuf++;
4850 *res++ = *pbuf++;
4851 }
4852 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004853 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004854 res += len;
4855 rescnt -= len;
4856 while (--width >= len) {
4857 --rescnt;
4858 *res++ = ' ';
4859 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004860 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004861 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004862 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004863 goto error;
4864 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004865 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004866 } /* '%' */
4867 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004868 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004869 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004870 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004871 goto error;
4872 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004873 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004874 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004875 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004876 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004877 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004878
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004879#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004880 unicode:
4881 if (args_owned) {
4882 Py_DECREF(args);
4883 args_owned = 0;
4884 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004885 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004886 if (PyTuple_Check(orig_args) && argidx > 0) {
4887 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004888 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004889 v = PyTuple_New(n);
4890 if (v == NULL)
4891 goto error;
4892 while (--n >= 0) {
4893 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4894 Py_INCREF(w);
4895 PyTuple_SET_ITEM(v, n, w);
4896 }
4897 args = v;
4898 } else {
4899 Py_INCREF(orig_args);
4900 args = orig_args;
4901 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004902 args_owned = 1;
4903 /* Take what we have of the result and let the Unicode formatting
4904 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004905 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004906 if (_PyString_Resize(&result, rescnt))
4907 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004908 fmtcnt = PyString_GET_SIZE(format) - \
4909 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004910 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4911 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004912 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004913 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004914 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004915 if (v == NULL)
4916 goto error;
4917 /* Paste what we have (result) to what the Unicode formatting
4918 function returned (v) and return the result (or error) */
4919 w = PyUnicode_Concat(result, v);
4920 Py_DECREF(result);
4921 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004922 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004923 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004924#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004925
Guido van Rossume5372401993-03-16 12:15:04 +00004926 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004927 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004928 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004929 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004930 }
Guido van Rossume5372401993-03-16 12:15:04 +00004931 return NULL;
4932}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004933
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934void
Fred Drakeba096332000-07-09 07:04:36 +00004935PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936{
4937 register PyStringObject *s = (PyStringObject *)(*p);
4938 PyObject *t;
4939 if (s == NULL || !PyString_Check(s))
4940 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004941 /* If it's a string subclass, we don't really know what putting
4942 it in the interned dict might do. */
4943 if (!PyString_CheckExact(s))
4944 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004946 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 if (interned == NULL) {
4948 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004949 if (interned == NULL) {
4950 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004951 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004953 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004954 t = PyDict_GetItem(interned, (PyObject *)s);
4955 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957 Py_DECREF(*p);
4958 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004959 return;
4960 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961
Armin Rigo79f7ad22004-08-07 19:27:39 +00004962 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004963 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004964 return;
4965 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004966 /* The two references in interned are not counted by refcnt.
4967 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004968 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004969 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004970}
4971
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972void
4973PyString_InternImmortal(PyObject **p)
4974{
4975 PyString_InternInPlace(p);
4976 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4977 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4978 Py_INCREF(*p);
4979 }
4980}
4981
Guido van Rossum2a61e741997-01-18 07:55:05 +00004982
4983PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004984PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004985{
4986 PyObject *s = PyString_FromString(cp);
4987 if (s == NULL)
4988 return NULL;
4989 PyString_InternInPlace(&s);
4990 return s;
4991}
4992
Guido van Rossum8cf04761997-08-02 02:57:45 +00004993void
Fred Drakeba096332000-07-09 07:04:36 +00004994PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004995{
4996 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004997 for (i = 0; i < UCHAR_MAX + 1; i++) {
4998 Py_XDECREF(characters[i]);
4999 characters[i] = NULL;
5000 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005001 Py_XDECREF(nullstring);
5002 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005003}
Barry Warsawa903ad982001-02-23 16:40:48 +00005004
Barry Warsawa903ad982001-02-23 16:40:48 +00005005void _Py_ReleaseInternedStrings(void)
5006{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005007 PyObject *keys;
5008 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005009 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005010
5011 if (interned == NULL || !PyDict_Check(interned))
5012 return;
5013 keys = PyDict_Keys(interned);
5014 if (keys == NULL || !PyList_Check(keys)) {
5015 PyErr_Clear();
5016 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005017 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005018
5019 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5020 detector, interned strings are not forcibly deallocated; rather, we
5021 give them their stolen references back, and then clear and DECREF
5022 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005023
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005024 fprintf(stderr, "releasing interned strings\n");
5025 n = PyList_GET_SIZE(keys);
5026 for (i = 0; i < n; i++) {
5027 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5028 switch (s->ob_sstate) {
5029 case SSTATE_NOT_INTERNED:
5030 /* XXX Shouldn't happen */
5031 break;
5032 case SSTATE_INTERNED_IMMORTAL:
5033 s->ob_refcnt += 1;
5034 break;
5035 case SSTATE_INTERNED_MORTAL:
5036 s->ob_refcnt += 2;
5037 break;
5038 default:
5039 Py_FatalError("Inconsistent interned string state.");
5040 }
5041 s->ob_sstate = SSTATE_NOT_INTERNED;
5042 }
5043 Py_DECREF(keys);
5044 PyDict_Clear(interned);
5045 Py_DECREF(interned);
5046 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005047}