blob: 75325ab84a1dcff0445ec147c27980e864b5d6bd [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000771
Fredrik Lundhb9479482006-05-26 17:22:38 +0000772#define STRINGLIB_NEW PyString_FromStringAndSize
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773#define STRINGLIB_CMP memcmp
774
Fredrik Lundhb9479482006-05-26 17:22:38 +0000775#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000776
Fredrik Lundha50d2012006-05-26 17:04:58 +0000777#include "stringlib/fastsearch.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000778#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000779
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781/* Methods */
782
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000783static int
Fred Drakeba096332000-07-09 07:04:36 +0000784string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000785{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000786 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000788 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000791 if (! PyString_CheckExact(op)) {
792 int ret;
793 /* A str subclass may have its own __str__ method. */
794 op = (PyStringObject *) PyObject_Str((PyObject *)op);
795 if (op == NULL)
796 return -1;
797 ret = string_print(op, fp, flags);
798 Py_DECREF(op);
799 return ret;
800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000801 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000802#ifdef __VMS
803 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
804#else
805 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
806#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000809
Thomas Wouters7e474022000-07-16 12:04:32 +0000810 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000811 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000812 if (memchr(op->ob_sval, '\'', op->ob_size) &&
813 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 quote = '"';
815
816 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 for (i = 0; i < op->ob_size; i++) {
818 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000821 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000822 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000823 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000824 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000825 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fprintf(fp, "\\r");
827 else if (c < ' ' || c >= 0x7f)
828 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000829 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000833 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834}
835
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836PyObject *
837PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000839 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000840 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000841 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000842 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000843 PyErr_SetString(PyExc_OverflowError,
844 "string is too large to make repr");
845 }
846 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000848 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
850 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000851 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 register char c;
853 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000854 int quote;
855
Thomas Wouters7e474022000-07-16 12:04:32 +0000856 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000857 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000858 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000860 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000861 quote = '"';
862
Tim Peters9161c8b2001-12-03 01:55:38 +0000863 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 /* There's at least enough room for a hex escape
867 and a closing quote. */
868 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000870 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000872 else if (c == '\t')
873 *p++ = '\\', *p++ = 't';
874 else if (c == '\n')
875 *p++ = '\\', *p++ = 'n';
876 else if (c == '\r')
877 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000878 else if (c < ' ' || c >= 0x7f) {
879 /* For performance, we don't want to call
880 PyOS_snprintf here (extra layers of
881 function call). */
882 sprintf(p, "\\x%02x", c & 0xff);
883 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000884 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000885 else
886 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000891 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000892 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000893 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895}
896
Guido van Rossum189f1df2001-05-01 16:51:53 +0000897static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000898string_repr(PyObject *op)
899{
900 return PyString_Repr(op, 1);
901}
902
903static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000904string_str(PyObject *s)
905{
Tim Petersc9933152001-10-16 20:18:24 +0000906 assert(PyString_Check(s));
907 if (PyString_CheckExact(s)) {
908 Py_INCREF(s);
909 return s;
910 }
911 else {
912 /* Subtype -- return genuine string with the same value. */
913 PyStringObject *t = (PyStringObject *) s;
914 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
915 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000916}
917
Martin v. Löwis18e16552006-02-15 17:27:45 +0000918static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000919string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920{
921 return a->ob_size;
922}
923
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000925string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926{
Andrew Dalke598710c2006-05-25 18:18:39 +0000927 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 register PyStringObject *op;
929 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000930#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 if (PyUnicode_Check(bb))
932 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000933#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000934 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000935 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000936 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 return NULL;
938 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000939#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000941 if ((a->ob_size == 0 || b->ob_size == 0) &&
942 PyString_CheckExact(a) && PyString_CheckExact(b)) {
943 if (a->ob_size == 0) {
944 Py_INCREF(bb);
945 return bb;
946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 Py_INCREF(a);
948 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 }
950 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000951 if (size < 0) {
952 PyErr_SetString(PyExc_OverflowError,
953 "strings are too large to concat");
954 return NULL;
955 }
956
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000957 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000958 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000959 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000960 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000961 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000962 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000963 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000964 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
965 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000966 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968#undef b
969}
970
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000972string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000974 register Py_ssize_t i;
975 register Py_ssize_t j;
976 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000978 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979 if (n < 0)
980 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000981 /* watch out for overflows: the size can overflow int,
982 * and the # of bytes needed can overflow size_t
983 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000985 if (n && size / n != a->ob_size) {
986 PyErr_SetString(PyExc_OverflowError,
987 "repeated string is too long");
988 return NULL;
989 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000990 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 Py_INCREF(a);
992 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 }
Tim Peterse7c05322004-06-27 17:24:49 +0000994 nbytes = (size_t)size;
995 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000996 PyErr_SetString(PyExc_OverflowError,
997 "repeated string is too long");
998 return NULL;
999 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001001 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001002 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001003 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001004 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001005 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001006 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001007 op->ob_sval[size] = '\0';
1008 if (a->ob_size == 1 && n > 0) {
1009 memset(op->ob_sval, a->ob_sval[0] , n);
1010 return (PyObject *) op;
1011 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001012 i = 0;
1013 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1015 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001016 }
1017 while (i < size) {
1018 j = (i <= size-i) ? i : size-i;
1019 memcpy(op->ob_sval+i, op->ob_sval, j);
1020 i += j;
1021 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001023}
1024
1025/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1026
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001028string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001029 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001030 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001031{
1032 if (i < 0)
1033 i = 0;
1034 if (j < 0)
1035 j = 0; /* Avoid signed/unsigned bug in next line */
1036 if (j > a->ob_size)
1037 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001038 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1039 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001040 Py_INCREF(a);
1041 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 }
1043 if (j < i)
1044 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001045 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046}
1047
Guido van Rossum9284a572000-03-07 15:53:43 +00001048static int
Fred Drakeba096332000-07-09 07:04:36 +00001049string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001050{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001051 char *s = PyString_AS_STRING(a);
1052 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001054 Py_ssize_t pos;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001055
1056 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001058 if (PyUnicode_Check(el))
1059 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001061 if (!PyString_Check(el)) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001066 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001067
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001068 if (len_sub == 0)
1069 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001070
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001071 pos = fastsearch(
1072 s, PyString_GET_SIZE(a),
1073 sub, len_sub, FAST_SEARCH
1074 );
Fredrik Lundh3a65d872006-05-26 17:31:41 +00001075
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001076 return (pos != -1);
Guido van Rossum9284a572000-03-07 15:53:43 +00001077}
1078
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001080string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001081{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001082 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001083 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001084 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001085 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086 return NULL;
1087 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001088 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001089 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001090 if (v == NULL)
1091 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001092 else {
1093#ifdef COUNT_ALLOCS
1094 one_strings++;
1095#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001096 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001097 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001098 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001099}
1100
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101static PyObject*
1102string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001103{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001105 Py_ssize_t len_a, len_b;
1106 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107 PyObject *result;
1108
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001109 /* Make sure both arguments are strings. */
1110 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 result = Py_NotImplemented;
1112 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001113 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 if (a == b) {
1115 switch (op) {
1116 case Py_EQ:case Py_LE:case Py_GE:
1117 result = Py_True;
1118 goto out;
1119 case Py_NE:case Py_LT:case Py_GT:
1120 result = Py_False;
1121 goto out;
1122 }
1123 }
1124 if (op == Py_EQ) {
1125 /* Supporting Py_NE here as well does not save
1126 much time, since Py_NE is rarely used. */
1127 if (a->ob_size == b->ob_size
1128 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001129 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 a->ob_size) == 0)) {
1131 result = Py_True;
1132 } else {
1133 result = Py_False;
1134 }
1135 goto out;
1136 }
1137 len_a = a->ob_size; len_b = b->ob_size;
1138 min_len = (len_a < len_b) ? len_a : len_b;
1139 if (min_len > 0) {
1140 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1141 if (c==0)
1142 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1143 }else
1144 c = 0;
1145 if (c == 0)
1146 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1147 switch (op) {
1148 case Py_LT: c = c < 0; break;
1149 case Py_LE: c = c <= 0; break;
1150 case Py_EQ: assert(0); break; /* unreachable */
1151 case Py_NE: c = c != 0; break;
1152 case Py_GT: c = c > 0; break;
1153 case Py_GE: c = c >= 0; break;
1154 default:
1155 result = Py_NotImplemented;
1156 goto out;
1157 }
1158 result = c ? Py_True : Py_False;
1159 out:
1160 Py_INCREF(result);
1161 return result;
1162}
1163
1164int
1165_PyString_Eq(PyObject *o1, PyObject *o2)
1166{
1167 PyStringObject *a, *b;
1168 a = (PyStringObject*)o1;
1169 b = (PyStringObject*)o2;
1170 return a->ob_size == b->ob_size
1171 && *a->ob_sval == *b->ob_sval
1172 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001173}
1174
Guido van Rossum9bfef441993-03-29 10:43:31 +00001175static long
Fred Drakeba096332000-07-09 07:04:36 +00001176string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001177{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001178 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001179 register unsigned char *p;
1180 register long x;
1181
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 if (a->ob_shash != -1)
1183 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 len = a->ob_size;
1185 p = (unsigned char *) a->ob_sval;
1186 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001188 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189 x ^= a->ob_size;
1190 if (x == -1)
1191 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001192 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 return x;
1194}
1195
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001196#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1197
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198static PyObject*
1199string_subscript(PyStringObject* self, PyObject* item)
1200{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001201 PyNumberMethods *nb = item->ob_type->tp_as_number;
1202 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1203 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 if (i == -1 && PyErr_Occurred())
1205 return NULL;
1206 if (i < 0)
1207 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001208 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 }
1210 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001211 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 char* source_buf;
1213 char* result_buf;
1214 PyObject* result;
1215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 PyString_GET_SIZE(self),
1218 &start, &stop, &step, &slicelength) < 0) {
1219 return NULL;
1220 }
1221
1222 if (slicelength <= 0) {
1223 return PyString_FromStringAndSize("", 0);
1224 }
1225 else {
1226 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001227 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001228 if (result_buf == NULL)
1229 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 cur += step, i++) {
1233 result_buf[i] = source_buf[cur];
1234 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001235
1236 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001237 slicelength);
1238 PyMem_Free(result_buf);
1239 return result;
1240 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001241 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001242 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001243 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 "string indices must be integers");
1245 return NULL;
1246 }
1247}
1248
Martin v. Löwis18e16552006-02-15 17:27:45 +00001249static Py_ssize_t
1250string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251{
1252 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001253 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001254 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255 return -1;
1256 }
1257 *ptr = (void *)self->ob_sval;
1258 return self->ob_size;
1259}
1260
Martin v. Löwis18e16552006-02-15 17:27:45 +00001261static Py_ssize_t
1262string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263{
Guido van Rossum045e6881997-09-08 18:30:11 +00001264 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001265 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266 return -1;
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271{
1272 if ( lenp )
1273 *lenp = self->ob_size;
1274 return 1;
1275}
1276
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277static Py_ssize_t
1278string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001279{
1280 if ( index != 0 ) {
1281 PyErr_SetString(PyExc_SystemError,
1282 "accessing non-existent string segment");
1283 return -1;
1284 }
1285 *ptr = self->ob_sval;
1286 return self->ob_size;
1287}
1288
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001289static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001290 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001291 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (ssizeargfunc)string_repeat, /*sq_repeat*/
1293 (ssizeargfunc)string_item, /*sq_item*/
1294 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001295 0, /*sq_ass_item*/
1296 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001297 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001298};
1299
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001300static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001301 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001302 (binaryfunc)string_subscript,
1303 0,
1304};
1305
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001306static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001307 (readbufferproc)string_buffer_getreadbuf,
1308 (writebufferproc)string_buffer_getwritebuf,
1309 (segcountproc)string_buffer_getsegcount,
1310 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001311};
1312
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313
1314
1315#define LEFTSTRIP 0
1316#define RIGHTSTRIP 1
1317#define BOTHSTRIP 2
1318
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001319/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001320static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1321
1322#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001323
Andrew Dalke525eab32006-05-26 14:00:45 +00001324
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001325/* Don't call if length < 2 */
1326#define Py_STRING_MATCH(target, offset, pattern, length) \
1327 (target[offset] == pattern[0] && \
1328 target[offset+length-1] == pattern[length-1] && \
1329 !memcmp(target+offset+1, pattern+1, length-2) )
1330
1331
Andrew Dalke525eab32006-05-26 14:00:45 +00001332/* Overallocate the initial list to reduce the number of reallocs for small
1333 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1334 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1335 text (roughly 11 words per line) and field delimited data (usually 1-10
1336 fields). For large strings the split algorithms are bandwidth limited
1337 so increasing the preallocation likely will not improve things.*/
1338
1339#define MAX_PREALLOC 12
1340
1341/* 5 splits gives 6 elements */
1342#define PREALLOC_SIZE(maxsplit) \
1343 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1344
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001345#define SPLIT_APPEND(data, left, right) \
1346 str = PyString_FromStringAndSize((data) + (left), \
1347 (right) - (left)); \
1348 if (str == NULL) \
1349 goto onError; \
1350 if (PyList_Append(list, str)) { \
1351 Py_DECREF(str); \
1352 goto onError; \
1353 } \
1354 else \
1355 Py_DECREF(str);
1356
Andrew Dalke02758d62006-05-26 15:21:01 +00001357#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001358 str = PyString_FromStringAndSize((data) + (left), \
1359 (right) - (left)); \
1360 if (str == NULL) \
1361 goto onError; \
1362 if (count < MAX_PREALLOC) { \
1363 PyList_SET_ITEM(list, count, str); \
1364 } else { \
1365 if (PyList_Append(list, str)) { \
1366 Py_DECREF(str); \
1367 goto onError; \
1368 } \
1369 else \
1370 Py_DECREF(str); \
1371 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001372 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001373
1374/* Always force the list to the expected size. */
1375#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1376
Andrew Dalke02758d62006-05-26 15:21:01 +00001377#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1378#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1379#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1380#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1381
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001382Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001383split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384{
Andrew Dalke525eab32006-05-26 14:00:45 +00001385 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001386 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001387 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388
1389 if (list == NULL)
1390 return NULL;
1391
Andrew Dalke02758d62006-05-26 15:21:01 +00001392 i = j = 0;
1393
1394 while (maxsplit-- > 0) {
1395 SKIP_SPACE(s, i, len);
1396 if (i==len) break;
1397 j = i; i++;
1398 SKIP_NONSPACE(s, i, len);
1399 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001401
1402 if (i < len) {
1403 /* Only occurs when maxsplit was reached */
1404 /* Skip any remaining whitespace and copy to end of string */
1405 SKIP_SPACE(s, i, len);
1406 if (i != len)
1407 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001408 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412 Py_DECREF(list);
1413 return NULL;
1414}
1415
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001416Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001417split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001418{
Andrew Dalke525eab32006-05-26 14:00:45 +00001419 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001420 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422
1423 if (list == NULL)
1424 return NULL;
1425
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001426 i = j = 0;
1427 while ((j < len) && (maxcount-- > 0)) {
1428 for(; j<len; j++) {
1429 /* I found that using memchr makes no difference */
1430 if (s[j] == ch) {
1431 SPLIT_ADD(s, i, j);
1432 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001434 }
1435 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001437 if (i <= len) {
1438 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001440 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441 return list;
1442
1443 onError:
1444 Py_DECREF(list);
1445 return NULL;
1446}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001448PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449"S.split([sep [,maxsplit]]) -> list of strings\n\
1450\n\
1451Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001453splits are done. If sep is not specified or is None, any\n\
1454whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455
1456static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001457string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001459 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001460 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001463#ifdef USE_FAST
1464 Py_ssize_t pos;
1465#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466
Martin v. Löwis9c830762006-04-13 08:37:17 +00001467 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001470 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 if (PyString_Check(subobj)) {
1474 sub = PyString_AS_STRING(subobj);
1475 n = PyString_GET_SIZE(subobj);
1476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001477#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 else if (PyUnicode_Check(subobj))
1479 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1482 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001483
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 if (n == 0) {
1485 PyErr_SetString(PyExc_ValueError, "empty separator");
1486 return NULL;
1487 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 else if (n == 1)
1489 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
Andrew Dalke525eab32006-05-26 14:00:45 +00001491 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 if (list == NULL)
1493 return NULL;
1494
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001495#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001497 while (maxsplit-- > 0) {
1498 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1499 if (pos < 0)
1500 break;
1501 j = i+pos;
1502 SPLIT_ADD(s, i, j);
1503 i = j + n;
1504
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001506#else
1507 i = j = 0;
1508 while ((j+n <= len) && (maxsplit-- > 0)) {
1509 for (; j+n <= len; j++) {
1510 if (Py_STRING_MATCH(s, j, sub, n)) {
1511 SPLIT_ADD(s, i, j);
1512 i = j = j + n;
1513 break;
1514 }
1515 }
1516 }
1517#endif
1518 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001519 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 return list;
1521
Andrew Dalke525eab32006-05-26 14:00:45 +00001522 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523 Py_DECREF(list);
1524 return NULL;
1525}
1526
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001527PyDoc_STRVAR(partition__doc__,
1528"S.partition(sep) -> (head, sep, tail)\n\
1529\n\
1530Searches for the separator sep in S, and returns the part before it,\n\
1531the separator itself, and the part after it. If the separator is not\n\
1532found, returns S and two empty strings.");
1533
1534static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001535string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001536{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001537 const char *sep;
1538 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001539
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001540 if (PyString_Check(sep_obj)) {
1541 sep = PyString_AS_STRING(sep_obj);
1542 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001544#ifdef Py_USING_UNICODE
1545 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001546 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001547#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001548 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549 return NULL;
1550
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001551 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001552 (PyObject*) self,
1553 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1554 sep_obj, sep, sep_len
1555 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001556}
1557
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001558PyDoc_STRVAR(rpartition__doc__,
1559"S.rpartition(sep) -> (head, sep, tail)\n\
1560\n\
1561Searches for the separator sep in S, starting at the end of S, and returns\n\
1562the part before it, the separator itself, and the part after it. If the\n\
1563separator is not found, returns S and two empty strings.");
1564
1565static PyObject *
1566string_rpartition(PyStringObject *self, PyObject *sep_obj)
1567{
1568 const char *sep;
1569 Py_ssize_t sep_len;
1570
1571 if (PyString_Check(sep_obj)) {
1572 sep = PyString_AS_STRING(sep_obj);
1573 sep_len = PyString_GET_SIZE(sep_obj);
1574 }
1575#ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(sep_obj))
1577 return PyUnicode_Partition((PyObject *) self, sep_obj);
1578#endif
1579 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1580 return NULL;
1581
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001582 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001583 (PyObject*) self,
1584 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1585 sep_obj, sep, sep_len
1586 );
1587}
1588
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001589Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001590rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001591{
Andrew Dalke525eab32006-05-26 14:00:45 +00001592 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001593 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595
1596 if (list == NULL)
1597 return NULL;
1598
Andrew Dalke02758d62006-05-26 15:21:01 +00001599 i = j = len-1;
1600
1601 while (maxsplit-- > 0) {
1602 RSKIP_SPACE(s, i);
1603 if (i<0) break;
1604 j = i; i--;
1605 RSKIP_NONSPACE(s, i);
1606 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001608 if (i >= 0) {
1609 /* Only occurs when maxsplit was reached */
1610 /* Skip any remaining whitespace and copy to beginning of string */
1611 RSKIP_SPACE(s, i);
1612 if (i >= 0)
1613 SPLIT_ADD(s, 0, i + 1);
1614
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001615 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001617 if (PyList_Reverse(list) < 0)
1618 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001619 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001620 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 Py_DECREF(list);
1622 return NULL;
1623}
1624
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001625Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001626rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627{
Andrew Dalke525eab32006-05-26 14:00:45 +00001628 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631
1632 if (list == NULL)
1633 return NULL;
1634
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001635 i = j = len - 1;
1636 while ((i >= 0) && (maxcount-- > 0)) {
1637 for (; i >= 0; i--) {
1638 if (s[i] == ch) {
1639 SPLIT_ADD(s, i + 1, j + 1);
1640 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001642 }
1643 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001644 }
1645 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001646 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001647 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001649 if (PyList_Reverse(list) < 0)
1650 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 return list;
1652
1653 onError:
1654 Py_DECREF(list);
1655 return NULL;
1656}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657
1658PyDoc_STRVAR(rsplit__doc__,
1659"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1660\n\
1661Return a list of the words in the string S, using sep as the\n\
1662delimiter string, starting at the end of the string and working\n\
1663to the front. If maxsplit is given, at most maxsplit splits are\n\
1664done. If sep is not specified or is None, any whitespace string\n\
1665is a separator.");
1666
1667static PyObject *
1668string_rsplit(PyStringObject *self, PyObject *args)
1669{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001670 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001671 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001672 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674
Martin v. Löwis9c830762006-04-13 08:37:17 +00001675 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676 return NULL;
1677 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001678 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 if (subobj == Py_None)
1680 return rsplit_whitespace(s, len, maxsplit);
1681 if (PyString_Check(subobj)) {
1682 sub = PyString_AS_STRING(subobj);
1683 n = PyString_GET_SIZE(subobj);
1684 }
1685#ifdef Py_USING_UNICODE
1686 else if (PyUnicode_Check(subobj))
1687 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1688#endif
1689 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1690 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001691
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 if (n == 0) {
1693 PyErr_SetString(PyExc_ValueError, "empty separator");
1694 return NULL;
1695 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001696 else if (n == 1)
1697 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698
Andrew Dalke525eab32006-05-26 14:00:45 +00001699 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 if (list == NULL)
1701 return NULL;
1702
1703 j = len;
1704 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001705
1706 while ( (i >= 0) && (maxsplit-- > 0) ) {
1707 for (; i>=0; i--) {
1708 if (Py_STRING_MATCH(s, i, sub, n)) {
1709 SPLIT_ADD(s, i + n, j);
1710 j = i;
1711 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001713 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001716 SPLIT_ADD(s, 0, j);
1717 FIX_PREALLOC_SIZE(list);
1718 if (PyList_Reverse(list) < 0)
1719 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720 return list;
1721
Andrew Dalke525eab32006-05-26 14:00:45 +00001722onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723 Py_DECREF(list);
1724 return NULL;
1725}
1726
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001728PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729"S.join(sequence) -> string\n\
1730\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
1734static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001735string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736{
1737 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001738 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001744 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745
Tim Peters19fe14e2001-01-19 03:03:47 +00001746 seq = PySequence_Fast(orig, "");
1747 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001748 return NULL;
1749 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001750
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001751 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 if (seqlen == 0) {
1753 Py_DECREF(seq);
1754 return PyString_FromString("");
1755 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001757 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001758 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1759 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001761 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764
Raymond Hettinger674f2412004-08-23 23:23:54 +00001765 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001766 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 * Do a pre-pass to figure out the total amount of space we'll
1768 * need (sz), see whether any argument is absurd, and defer to
1769 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001770 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001771 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 item = PySequence_Fast_GET_ITEM(seq, i);
1774 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001775#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001777 /* Defer to Unicode join.
1778 * CAUTION: There's no gurantee that the
1779 * original sequence can be iterated over
1780 * again, so we must pass seq here.
1781 */
1782 PyObject *result;
1783 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001784 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001785 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001787#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001789 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001790 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001791 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 Py_DECREF(seq);
1793 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 sz += PyString_GET_SIZE(item);
1796 if (i != 0)
1797 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001798 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 PyErr_SetString(PyExc_OverflowError,
1800 "join() is too long for a Python string");
1801 Py_DECREF(seq);
1802 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 }
1805
1806 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001807 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 if (res == NULL) {
1809 Py_DECREF(seq);
1810 return NULL;
1811 }
1812
1813 /* Catenate everything. */
1814 p = PyString_AS_STRING(res);
1815 for (i = 0; i < seqlen; ++i) {
1816 size_t n;
1817 item = PySequence_Fast_GET_ITEM(seq, i);
1818 n = PyString_GET_SIZE(item);
1819 memcpy(p, PyString_AS_STRING(item), n);
1820 p += n;
1821 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001822 memcpy(p, sep, seplen);
1823 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001826
Jeremy Hylton49048292000-07-11 03:28:17 +00001827 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829}
1830
Tim Peters52e155e2001-06-16 05:42:57 +00001831PyObject *
1832_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001833{
Tim Petersa7259592001-06-16 05:11:17 +00001834 assert(sep != NULL && PyString_Check(sep));
1835 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001836 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001837}
1838
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001839Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001841{
1842 if (*end > len)
1843 *end = len;
1844 else if (*end < 0)
1845 *end += len;
1846 if (*end < 0)
1847 *end = 0;
1848 if (*start < 0)
1849 *start += len;
1850 if (*start < 0)
1851 *start = 0;
1852}
1853
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001854Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001855string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001859 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001860 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861
Martin v. Löwis18e16552006-02-15 17:27:45 +00001862 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001863 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001864 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 return -2;
1866 if (PyString_Check(subobj)) {
1867 sub = PyString_AS_STRING(subobj);
1868 n = PyString_GET_SIZE(subobj);
1869 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001870#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001872 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001873#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875 return -2;
1876
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001877 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001879 if (n == 0)
1880 return (dir > 0) ? i : last;
1881 if (dir > 0) {
1882 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1883 FAST_SEARCH);
1884 if (pos < 0)
1885 return pos;
1886 return pos + i;
Fredrik Lundh3a65d872006-05-26 17:31:41 +00001887 } else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001888 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001889
Guido van Rossum4c08d552000-03-10 22:55:18 +00001890 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001891 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001892 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001893 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001894 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001895 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001896
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 return -1;
1898}
1899
1900
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001901PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902"S.find(sub [,start [,end]]) -> int\n\
1903\n\
1904Return the lowest index in S where substring sub is found,\n\
1905such that sub is contained within s[start,end]. Optional\n\
1906arguments start and end are interpreted as in slice notation.\n\
1907\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001908Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909
1910static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001911string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 if (result == -2)
1915 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917}
1918
1919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921"S.index(sub [,start [,end]]) -> int\n\
1922\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001923Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
1925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001926string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001928 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 if (result == -2)
1930 return NULL;
1931 if (result == -1) {
1932 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001933 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 return NULL;
1935 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001936 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937}
1938
1939
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001940PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941"S.rfind(sub [,start [,end]]) -> int\n\
1942\n\
1943Return the highest index in S where substring sub is found,\n\
1944such that sub is contained within s[start,end]. Optional\n\
1945arguments start and end are interpreted as in slice notation.\n\
1946\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001947Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948
1949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001950string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001952 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 if (result == -2)
1954 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001959PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960"S.rindex(sub [,start [,end]]) -> int\n\
1961\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001962Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
1964static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001965string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001967 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968 if (result == -2)
1969 return NULL;
1970 if (result == -1) {
1971 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001972 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973 return NULL;
1974 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976}
1977
1978
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001979Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1981{
1982 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1986 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001987
1988 i = 0;
1989 if (striptype != RIGHTSTRIP) {
1990 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1991 i++;
1992 }
1993 }
1994
1995 j = len;
1996 if (striptype != LEFTSTRIP) {
1997 do {
1998 j--;
1999 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2000 j++;
2001 }
2002
2003 if (i == 0 && j == len && PyString_CheckExact(self)) {
2004 Py_INCREF(self);
2005 return (PyObject*)self;
2006 }
2007 else
2008 return PyString_FromStringAndSize(s+i, j-i);
2009}
2010
2011
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002012Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002013do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014{
2015 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002016 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 i = 0;
2019 if (striptype != RIGHTSTRIP) {
2020 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2021 i++;
2022 }
2023 }
2024
2025 j = len;
2026 if (striptype != LEFTSTRIP) {
2027 do {
2028 j--;
2029 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2030 j++;
2031 }
2032
Tim Peters8fa5dd02001-09-12 02:18:30 +00002033 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 Py_INCREF(self);
2035 return (PyObject*)self;
2036 }
2037 else
2038 return PyString_FromStringAndSize(s+i, j-i);
2039}
2040
2041
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002042Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2044{
2045 PyObject *sep = NULL;
2046
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002047 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002048 return NULL;
2049
2050 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002051 if (PyString_Check(sep))
2052 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002054 else if (PyUnicode_Check(sep)) {
2055 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2056 PyObject *res;
2057 if (uniself==NULL)
2058 return NULL;
2059 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2060 striptype, sep);
2061 Py_DECREF(uniself);
2062 return res;
2063 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002064#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002065 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002066#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002067 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002068#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002069 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002070#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002071 STRIPNAME(striptype));
2072 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002073 }
2074
2075 return do_strip(self, striptype);
2076}
2077
2078
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002079PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002080"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081\n\
2082Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002084If chars is given and not None, remove characters in chars instead.\n\
2085If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086
2087static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002088string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002090 if (PyTuple_GET_SIZE(args) == 0)
2091 return do_strip(self, BOTHSTRIP); /* Common case */
2092 else
2093 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094}
2095
2096
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002097PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002101If chars is given and not None, remove characters in chars instead.\n\
2102If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103
2104static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002105string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002107 if (PyTuple_GET_SIZE(args) == 0)
2108 return do_strip(self, LEFTSTRIP); /* Common case */
2109 else
2110 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111}
2112
2113
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002115"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002117Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002118If chars is given and not None, remove characters in chars instead.\n\
2119If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
2121static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002122string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002124 if (PyTuple_GET_SIZE(args) == 0)
2125 return do_strip(self, RIGHTSTRIP); /* Common case */
2126 else
2127 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128}
2129
2130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002131PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132"S.lower() -> string\n\
2133\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002134Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002136/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2137#ifndef _tolower
2138#define _tolower tolower
2139#endif
2140
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002142string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002144 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002145 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002146 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002148 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002149 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002151
2152 s = PyString_AS_STRING(newobj);
2153
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002154 memcpy(s, PyString_AS_STRING(self), n);
2155
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002157 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002158 if (isupper(c))
2159 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161
Anthony Baxtera6286212006-04-11 07:42:36 +00002162 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163}
2164
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002165PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166"S.upper() -> string\n\
2167\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002168Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002170#ifndef _toupper
2171#define _toupper toupper
2172#endif
2173
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002175string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002177 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002178 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002179 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002181 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002182 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002184
2185 s = PyString_AS_STRING(newobj);
2186
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002187 memcpy(s, PyString_AS_STRING(self), n);
2188
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002190 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002191 if (islower(c))
2192 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002194
Anthony Baxtera6286212006-04-11 07:42:36 +00002195 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196}
2197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002198PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199"S.title() -> string\n\
2200\n\
2201Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002202characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203
2204static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002205string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206{
2207 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002208 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002210 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211
Anthony Baxtera6286212006-04-11 07:42:36 +00002212 newobj = PyString_FromStringAndSize(NULL, n);
2213 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002215 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216 for (i = 0; i < n; i++) {
2217 int c = Py_CHARMASK(*s++);
2218 if (islower(c)) {
2219 if (!previous_is_cased)
2220 c = toupper(c);
2221 previous_is_cased = 1;
2222 } else if (isupper(c)) {
2223 if (previous_is_cased)
2224 c = tolower(c);
2225 previous_is_cased = 1;
2226 } else
2227 previous_is_cased = 0;
2228 *s_new++ = c;
2229 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002230 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231}
2232
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002233PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234"S.capitalize() -> string\n\
2235\n\
2236Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002237capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238
2239static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002240string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241{
2242 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002243 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245
Anthony Baxtera6286212006-04-11 07:42:36 +00002246 newobj = PyString_FromStringAndSize(NULL, n);
2247 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002249 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 if (0 < n) {
2251 int c = Py_CHARMASK(*s++);
2252 if (islower(c))
2253 *s_new = toupper(c);
2254 else
2255 *s_new = c;
2256 s_new++;
2257 }
2258 for (i = 1; i < n; i++) {
2259 int c = Py_CHARMASK(*s++);
2260 if (isupper(c))
2261 *s_new = tolower(c);
2262 else
2263 *s_new = c;
2264 s_new++;
2265 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002266 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267}
2268
2269
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002270PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271"S.count(sub[, start[, end]]) -> int\n\
2272\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002273Return the number of non-overlapping occurrences of substring sub in\n\
2274string S[start:end]. Optional arguments start and end are interpreted\n\
2275as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276
2277static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002278string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002280 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002281 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002282 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002283 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285
Guido van Rossumc6821402000-05-08 14:08:05 +00002286 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2287 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002289
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290 if (PyString_Check(subobj)) {
2291 sub = PyString_AS_STRING(subobj);
2292 n = PyString_GET_SIZE(subobj);
2293 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002294#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002295 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002296 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002297 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2298 if (count == -1)
2299 return NULL;
2300 else
2301 return PyInt_FromLong((long) count);
2302 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002303#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2305 return NULL;
2306
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002307 string_adjust_indices(&i, &last, len);
2308
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309 m = last + 1 - n;
2310 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002311 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312
Fredrik Lundhaf722372006-05-25 17:55:31 +00002313 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2314 if (r < 0)
2315 r = 0; /* no match */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002316 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317}
2318
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002319PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320"S.swapcase() -> string\n\
2321\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324
2325static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002326string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327{
2328 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002329 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002330 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331
Anthony Baxtera6286212006-04-11 07:42:36 +00002332 newobj = PyString_FromStringAndSize(NULL, n);
2333 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002335 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336 for (i = 0; i < n; i++) {
2337 int c = Py_CHARMASK(*s++);
2338 if (islower(c)) {
2339 *s_new = toupper(c);
2340 }
2341 else if (isupper(c)) {
2342 *s_new = tolower(c);
2343 }
2344 else
2345 *s_new = c;
2346 s_new++;
2347 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002348 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349}
2350
2351
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002352PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353"S.translate(table [,deletechars]) -> string\n\
2354\n\
2355Return a copy of the string S, where all characters occurring\n\
2356in the optional argument deletechars are removed, and the\n\
2357remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002358translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359
2360static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002361string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363 register char *input, *output;
2364 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002365 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002368 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 PyObject *result;
2370 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002373 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376
2377 if (PyString_Check(tableobj)) {
2378 table1 = PyString_AS_STRING(tableobj);
2379 tablen = PyString_GET_SIZE(tableobj);
2380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002381#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002383 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 parameter; instead a mapping to None will cause characters
2385 to be deleted. */
2386 if (delobj != NULL) {
2387 PyErr_SetString(PyExc_TypeError,
2388 "deletions are implemented differently for unicode");
2389 return NULL;
2390 }
2391 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2392 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002393#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396
Martin v. Löwis00b61272002-12-12 20:03:19 +00002397 if (tablen != 256) {
2398 PyErr_SetString(PyExc_ValueError,
2399 "translation table must be 256 characters long");
2400 return NULL;
2401 }
2402
Guido van Rossum4c08d552000-03-10 22:55:18 +00002403 if (delobj != NULL) {
2404 if (PyString_Check(delobj)) {
2405 del_table = PyString_AS_STRING(delobj);
2406 dellen = PyString_GET_SIZE(delobj);
2407 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002408#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409 else if (PyUnicode_Check(delobj)) {
2410 PyErr_SetString(PyExc_TypeError,
2411 "deletions are implemented differently for unicode");
2412 return NULL;
2413 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002414#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2416 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 }
2418 else {
2419 del_table = NULL;
2420 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 }
2422
2423 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002424 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 result = PyString_FromStringAndSize((char *)NULL, inlen);
2426 if (result == NULL)
2427 return NULL;
2428 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002429 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430
2431 if (dellen == 0) {
2432 /* If no deletions are required, use faster code */
2433 for (i = inlen; --i >= 0; ) {
2434 c = Py_CHARMASK(*input++);
2435 if (Py_CHARMASK((*output++ = table[c])) != c)
2436 changed = 1;
2437 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002438 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 return result;
2440 Py_DECREF(result);
2441 Py_INCREF(input_obj);
2442 return input_obj;
2443 }
2444
2445 for (i = 0; i < 256; i++)
2446 trans_table[i] = Py_CHARMASK(table[i]);
2447
2448 for (i = 0; i < dellen; i++)
2449 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2450
2451 for (i = inlen; --i >= 0; ) {
2452 c = Py_CHARMASK(*input++);
2453 if (trans_table[c] != -1)
2454 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2455 continue;
2456 changed = 1;
2457 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002458 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459 Py_DECREF(result);
2460 Py_INCREF(input_obj);
2461 return input_obj;
2462 }
2463 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002464 if (inlen > 0)
2465 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466 return result;
2467}
2468
2469
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002470#define FORWARD 1
2471#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002473/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002475#define findchar(target, target_len, c) \
2476 ((char *)memchr((const void *)(target), c, target_len))
2477
2478/* String ops must return a string. */
2479/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002480Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002481return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483 if (PyString_CheckExact(self)) {
2484 Py_INCREF(self);
2485 return self;
2486 }
2487 return (PyStringObject *)PyString_FromStringAndSize(
2488 PyString_AS_STRING(self),
2489 PyString_GET_SIZE(self));
2490}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002492Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493countchar(char *target, int target_len, char c)
2494{
2495 Py_ssize_t count=0;
2496 char *start=target;
2497 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002499 while ( (start=findchar(start, end-start, c)) != NULL ) {
2500 count++;
2501 start += 1;
2502 }
2503
2504 return count;
2505}
2506
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002507Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002508findstring(char *target, Py_ssize_t target_len,
2509 char *pattern, Py_ssize_t pattern_len,
2510 Py_ssize_t start,
2511 Py_ssize_t end,
2512 int direction)
2513{
2514 if (start < 0) {
2515 start += target_len;
2516 if (start < 0)
2517 start = 0;
2518 }
2519 if (end > target_len) {
2520 end = target_len;
2521 } else if (end < 0) {
2522 end += target_len;
2523 if (end < 0)
2524 end = 0;
2525 }
2526
2527 /* zero-length substrings always match at the first attempt */
2528 if (pattern_len == 0)
2529 return (direction > 0) ? start : end;
2530
2531 end -= pattern_len;
2532
2533 if (direction < 0) {
2534 for (; end >= start; end--)
2535 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2536 return end;
2537 } else {
2538 for (; start <= end; start++)
2539 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2540 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541 }
2542 return -1;
2543}
2544
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002545Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002546countstring(char *target, Py_ssize_t target_len,
2547 char *pattern, Py_ssize_t pattern_len,
2548 Py_ssize_t start,
2549 Py_ssize_t end,
2550 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002552 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002554 if (start < 0) {
2555 start += target_len;
2556 if (start < 0)
2557 start = 0;
2558 }
2559 if (end > target_len) {
2560 end = target_len;
2561 } else if (end < 0) {
2562 end += target_len;
2563 if (end < 0)
2564 end = 0;
2565 }
2566
2567 /* zero-length substrings match everywhere */
2568 if (pattern_len == 0)
2569 return target_len+1;
2570
2571 end -= pattern_len;
2572
2573 if (direction < 0) {
2574 for (; end >= start; end--)
2575 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2576 count++;
2577 end -= pattern_len-1;
2578 }
2579 } else {
2580 for (; start <= end; start++)
2581 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2582 count++;
2583 start += pattern_len-1;
2584 }
2585 }
2586 return count;
2587}
2588
2589
2590/* Algorithms for difference cases of string replacement */
2591
2592/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002593Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002594replace_interleave(PyStringObject *self,
2595 PyStringObject *to,
2596 Py_ssize_t maxcount)
2597{
2598 char *self_s, *to_s, *result_s;
2599 Py_ssize_t self_len, to_len, result_len;
2600 Py_ssize_t count, i, product;
2601 PyStringObject *result;
2602
2603 self_len = PyString_GET_SIZE(self);
2604 to_len = PyString_GET_SIZE(to);
2605
2606 /* 1 at the end plus 1 after every character */
2607 count = self_len+1;
2608 if (maxcount < count)
2609 count = maxcount;
2610
2611 /* Check for overflow */
2612 /* result_len = count * to_len + self_len; */
2613 product = count * to_len;
2614 if (product / to_len != count) {
2615 PyErr_SetString(PyExc_OverflowError,
2616 "replace string is too long");
2617 return NULL;
2618 }
2619 result_len = product + self_len;
2620 if (result_len < 0) {
2621 PyErr_SetString(PyExc_OverflowError,
2622 "replace string is too long");
2623 return NULL;
2624 }
2625
2626 if (! (result = (PyStringObject *)
2627 PyString_FromStringAndSize(NULL, result_len)) )
2628 return NULL;
2629
2630 self_s = PyString_AS_STRING(self);
2631 to_s = PyString_AS_STRING(to);
2632 to_len = PyString_GET_SIZE(to);
2633 result_s = PyString_AS_STRING(result);
2634
2635 /* TODO: special case single character, which doesn't need memcpy */
2636
2637 /* Lay the first one down (guaranteed this will occur) */
2638 memcpy(result_s, to_s, to_len);
2639 result_s += to_len;
2640 count -= 1;
2641
2642 for (i=0; i<count; i++) {
2643 *result_s++ = *self_s++;
2644 memcpy(result_s, to_s, to_len);
2645 result_s += to_len;
2646 }
2647
2648 /* Copy the rest of the original string */
2649 memcpy(result_s, self_s, self_len-i);
2650
2651 return result;
2652}
2653
2654/* Special case for deleting a single character */
2655/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002656Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002657replace_delete_single_character(PyStringObject *self,
2658 char from_c, Py_ssize_t maxcount)
2659{
2660 char *self_s, *result_s;
2661 char *start, *next, *end;
2662 Py_ssize_t self_len, result_len;
2663 Py_ssize_t count;
2664 PyStringObject *result;
2665
2666 self_len = PyString_GET_SIZE(self);
2667 self_s = PyString_AS_STRING(self);
2668
2669 count = countchar(self_s, self_len, from_c);
2670 if (count == 0) {
2671 return return_self(self);
2672 }
2673 if (count > maxcount)
2674 count = maxcount;
2675
2676 result_len = self_len - count; /* from_len == 1 */
2677 assert(result_len>=0);
2678
2679 if ( (result = (PyStringObject *)
2680 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2681 return NULL;
2682 result_s = PyString_AS_STRING(result);
2683
2684 start = self_s;
2685 end = self_s + self_len;
2686 while (count-- > 0) {
2687 next = findchar(start, end-start, from_c);
2688 if (next == NULL)
2689 break;
2690 memcpy(result_s, start, next-start);
2691 result_s += (next-start);
2692 start = next+1;
2693 }
2694 memcpy(result_s, start, end-start);
2695
2696 return result;
2697}
2698
2699/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2700
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002701Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702replace_delete_substring(PyStringObject *self, PyStringObject *from,
2703 Py_ssize_t maxcount) {
2704 char *self_s, *from_s, *result_s;
2705 char *start, *next, *end;
2706 Py_ssize_t self_len, from_len, result_len;
2707 Py_ssize_t count, offset;
2708 PyStringObject *result;
2709
2710 self_len = PyString_GET_SIZE(self);
2711 self_s = PyString_AS_STRING(self);
2712 from_len = PyString_GET_SIZE(from);
2713 from_s = PyString_AS_STRING(from);
2714
2715 count = countstring(self_s, self_len,
2716 from_s, from_len,
2717 0, self_len, 1);
2718
2719 if (count > maxcount)
2720 count = maxcount;
2721
2722 if (count == 0) {
2723 /* no matches */
2724 return return_self(self);
2725 }
2726
2727 result_len = self_len - (count * from_len);
2728 assert (result_len>=0);
2729
2730 if ( (result = (PyStringObject *)
2731 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2732 return NULL;
2733
2734 result_s = PyString_AS_STRING(result);
2735
2736 start = self_s;
2737 end = self_s + self_len;
2738 while (count-- > 0) {
2739 offset = findstring(start, end-start,
2740 from_s, from_len,
2741 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742 if (offset == -1)
2743 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 next = start + offset;
2745
2746 memcpy(result_s, start, next-start);
2747
2748 result_s += (next-start);
2749 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002750 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751 memcpy(result_s, start, end-start);
2752 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002753}
2754
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002755/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002756Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757replace_single_character_in_place(PyStringObject *self,
2758 char from_c, char to_c,
2759 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002760{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 char *self_s, *result_s, *start, *end, *next;
2762 Py_ssize_t self_len;
2763 PyStringObject *result;
2764
2765 /* The result string will be the same size */
2766 self_s = PyString_AS_STRING(self);
2767 self_len = PyString_GET_SIZE(self);
2768
2769 next = findchar(self_s, self_len, from_c);
2770
2771 if (next == NULL) {
2772 /* No matches; return the original string */
2773 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002774 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002775
2776 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002777 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 if (result == NULL)
2779 return NULL;
2780 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002781 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002782
2783 /* change everything in-place, starting with this one */
2784 start = result_s + (next-self_s);
2785 *start = to_c;
2786 start++;
2787 end = result_s + self_len;
2788
2789 while (--maxcount > 0) {
2790 next = findchar(start, end-start, from_c);
2791 if (next == NULL)
2792 break;
2793 *next = to_c;
2794 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002795 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796
2797 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002798}
2799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002801Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802replace_substring_in_place(PyStringObject *self,
2803 PyStringObject *from,
2804 PyStringObject *to,
2805 Py_ssize_t maxcount)
2806{
2807 char *result_s, *start, *end;
2808 char *self_s, *from_s, *to_s;
2809 Py_ssize_t self_len, from_len, offset;
2810 PyStringObject *result;
2811
2812 /* The result string will be the same size */
2813
2814 self_s = PyString_AS_STRING(self);
2815 self_len = PyString_GET_SIZE(self);
2816
2817 from_s = PyString_AS_STRING(from);
2818 from_len = PyString_GET_SIZE(from);
2819 to_s = PyString_AS_STRING(to);
2820
2821 offset = findstring(self_s, self_len,
2822 from_s, from_len,
2823 0, self_len, FORWARD);
2824
2825 if (offset == -1) {
2826 /* No matches; return the original string */
2827 return return_self(self);
2828 }
2829
2830 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002831 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 if (result == NULL)
2833 return NULL;
2834 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002835 memcpy(result_s, self_s, self_len);
2836
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002837
2838 /* change everything in-place, starting with this one */
2839 start = result_s + offset;
2840 memcpy(start, to_s, from_len);
2841 start += from_len;
2842 end = result_s + self_len;
2843
2844 while ( --maxcount > 0) {
2845 offset = findstring(start, end-start,
2846 from_s, from_len,
2847 0, end-start, FORWARD);
2848 if (offset==-1)
2849 break;
2850 memcpy(start+offset, to_s, from_len);
2851 start += offset+from_len;
2852 }
2853
2854 return result;
2855}
2856
2857/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002858Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859replace_single_character(PyStringObject *self,
2860 char from_c,
2861 PyStringObject *to,
2862 Py_ssize_t maxcount)
2863{
2864 char *self_s, *to_s, *result_s;
2865 char *start, *next, *end;
2866 Py_ssize_t self_len, to_len, result_len;
2867 Py_ssize_t count, product;
2868 PyStringObject *result;
2869
2870 self_s = PyString_AS_STRING(self);
2871 self_len = PyString_GET_SIZE(self);
2872
2873 count = countchar(self_s, self_len, from_c);
2874 if (count > maxcount)
2875 count = maxcount;
2876
2877 if (count == 0) {
2878 /* no matches, return unchanged */
2879 return return_self(self);
2880 }
2881
2882 to_s = PyString_AS_STRING(to);
2883 to_len = PyString_GET_SIZE(to);
2884
2885 /* use the difference between current and new, hence the "-1" */
2886 /* result_len = self_len + count * (to_len-1) */
2887 product = count * (to_len-1);
2888 if (product / (to_len-1) != count) {
2889 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2890 return NULL;
2891 }
2892 result_len = self_len + product;
2893 if (result_len < 0) {
2894 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2895 return NULL;
2896 }
2897
2898 if ( (result = (PyStringObject *)
2899 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2900 return NULL;
2901 result_s = PyString_AS_STRING(result);
2902
2903 start = self_s;
2904 end = self_s + self_len;
2905 while (count-- > 0) {
2906 next = findchar(start, end-start, from_c);
2907 if (next == NULL)
2908 break;
2909
2910 if (next == start) {
2911 /* replace with the 'to' */
2912 memcpy(result_s, to_s, to_len);
2913 result_s += to_len;
2914 start += 1;
2915 } else {
2916 /* copy the unchanged old then the 'to' */
2917 memcpy(result_s, start, next-start);
2918 result_s += (next-start);
2919 memcpy(result_s, to_s, to_len);
2920 result_s += to_len;
2921 start = next+1;
2922 }
2923 }
2924 /* Copy the remainder of the remaining string */
2925 memcpy(result_s, start, end-start);
2926
2927 return result;
2928}
2929
2930/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002931Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002932replace_substring(PyStringObject *self,
2933 PyStringObject *from,
2934 PyStringObject *to,
2935 Py_ssize_t maxcount) {
2936 char *self_s, *from_s, *to_s, *result_s;
2937 char *start, *next, *end;
2938 Py_ssize_t self_len, from_len, to_len, result_len;
2939 Py_ssize_t count, offset, product;
2940 PyStringObject *result;
2941
2942 self_s = PyString_AS_STRING(self);
2943 self_len = PyString_GET_SIZE(self);
2944 from_s = PyString_AS_STRING(from);
2945 from_len = PyString_GET_SIZE(from);
2946
2947 count = countstring(self_s, self_len,
2948 from_s, from_len,
2949 0, self_len, FORWARD);
2950 if (count > maxcount)
2951 count = maxcount;
2952
2953 if (count == 0) {
2954 /* no matches, return unchanged */
2955 return return_self(self);
2956 }
2957
2958 to_s = PyString_AS_STRING(to);
2959 to_len = PyString_GET_SIZE(to);
2960
2961 /* Check for overflow */
2962 /* result_len = self_len + count * (to_len-from_len) */
2963 product = count * (to_len-from_len);
2964 if (product / (to_len-from_len) != count) {
2965 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2966 return NULL;
2967 }
2968 result_len = self_len + product;
2969 if (result_len < 0) {
2970 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2971 return NULL;
2972 }
2973
2974 if ( (result = (PyStringObject *)
2975 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2976 return NULL;
2977 result_s = PyString_AS_STRING(result);
2978
2979 start = self_s;
2980 end = self_s + self_len;
2981 while (count-- > 0) {
2982 offset = findstring(start, end-start,
2983 from_s, from_len,
2984 0, end-start, FORWARD);
2985 if (offset == -1)
2986 break;
2987 next = start+offset;
2988 if (next == start) {
2989 /* replace with the 'to' */
2990 memcpy(result_s, to_s, to_len);
2991 result_s += to_len;
2992 start += from_len;
2993 } else {
2994 /* copy the unchanged old then the 'to' */
2995 memcpy(result_s, start, next-start);
2996 result_s += (next-start);
2997 memcpy(result_s, to_s, to_len);
2998 result_s += to_len;
2999 start = next+from_len;
3000 }
3001 }
3002 /* Copy the remainder of the remaining string */
3003 memcpy(result_s, start, end-start);
3004
3005 return result;
3006}
3007
3008
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003009Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003010replace(PyStringObject *self,
3011 PyStringObject *from,
3012 PyStringObject *to,
3013 Py_ssize_t maxcount)
3014{
3015 Py_ssize_t from_len, to_len;
3016
3017 if (maxcount < 0) {
3018 maxcount = PY_SSIZE_T_MAX;
3019 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3020 /* nothing to do; return the original string */
3021 return return_self(self);
3022 }
3023
3024 from_len = PyString_GET_SIZE(from);
3025 to_len = PyString_GET_SIZE(to);
3026
3027 if (maxcount == 0 ||
3028 (from_len == 0 && to_len == 0)) {
3029 /* nothing to do; return the original string */
3030 return return_self(self);
3031 }
3032
3033 /* Handle zero-length special cases */
3034
3035 if (from_len == 0) {
3036 /* insert the 'to' string everywhere. */
3037 /* >>> "Python".replace("", ".") */
3038 /* '.P.y.t.h.o.n.' */
3039 return replace_interleave(self, to, maxcount);
3040 }
3041
3042 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3043 /* point for an empty self string to generate a non-empty string */
3044 /* Special case so the remaining code always gets a non-empty string */
3045 if (PyString_GET_SIZE(self) == 0) {
3046 return return_self(self);
3047 }
3048
3049 if (to_len == 0) {
3050 /* delete all occurances of 'from' string */
3051 if (from_len == 1) {
3052 return replace_delete_single_character(
3053 self, PyString_AS_STRING(from)[0], maxcount);
3054 } else {
3055 return replace_delete_substring(self, from, maxcount);
3056 }
3057 }
3058
3059 /* Handle special case where both strings have the same length */
3060
3061 if (from_len == to_len) {
3062 if (from_len == 1) {
3063 return replace_single_character_in_place(
3064 self,
3065 PyString_AS_STRING(from)[0],
3066 PyString_AS_STRING(to)[0],
3067 maxcount);
3068 } else {
3069 return replace_substring_in_place(
3070 self, from, to, maxcount);
3071 }
3072 }
3073
3074 /* Otherwise use the more generic algorithms */
3075 if (from_len == 1) {
3076 return replace_single_character(self, PyString_AS_STRING(from)[0],
3077 to, maxcount);
3078 } else {
3079 /* len('from')>=2, len('to')>=1 */
3080 return replace_substring(self, from, to, maxcount);
3081 }
3082}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003084PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003085"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086\n\
3087Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003088old replaced by new. If the optional argument count is\n\
3089given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090
3091static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003092string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003094 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003095 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003096 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003097 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003099 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003100 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003102 if (PyString_Check(from)) {
3103 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003105#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003106 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003107 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003108 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003109#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003110 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111 return NULL;
3112
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003113 if (PyString_Check(to)) {
3114 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003116#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003117 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003118 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003119 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003120#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003121 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003122 return NULL;
3123
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003124 return (PyObject *)replace((PyStringObject *) self,
3125 (PyStringObject *) from,
3126 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003127}
3128
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003129/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003131PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003133\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003134Return True if S starts with the specified prefix, False otherwise.\n\
3135With optional start, test S beginning at that position.\n\
3136With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
3138static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003139string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003142 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003144 Py_ssize_t plen;
3145 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003146 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003148
Guido van Rossumc6821402000-05-08 14:08:05 +00003149 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3150 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 return NULL;
3152 if (PyString_Check(subobj)) {
3153 prefix = PyString_AS_STRING(subobj);
3154 plen = PyString_GET_SIZE(subobj);
3155 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003156#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003157 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003158 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003159 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003160 subobj, start, end, -1);
3161 if (rc == -1)
3162 return NULL;
3163 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003164 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003165 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003166#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003168 return NULL;
3169
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003170 string_adjust_indices(&start, &end, len);
3171
3172 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003173 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003175 if (end-start >= plen)
3176 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3177 else
3178 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179}
3180
3181
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003182PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003183"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003184\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003185Return True if S ends with the specified suffix, False otherwise.\n\
3186With optional start, test S beginning at that position.\n\
3187With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188
3189static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003190string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003193 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003195 Py_ssize_t slen;
3196 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003197 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199
Guido van Rossumc6821402000-05-08 14:08:05 +00003200 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3201 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 return NULL;
3203 if (PyString_Check(subobj)) {
3204 suffix = PyString_AS_STRING(subobj);
3205 slen = PyString_GET_SIZE(subobj);
3206 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003207#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003208 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003209 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003210 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003211 subobj, start, end, +1);
3212 if (rc == -1)
3213 return NULL;
3214 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003215 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003216 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003217#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219 return NULL;
3220
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003221 string_adjust_indices(&start, &end, len);
3222
3223 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003224 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003225
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003226 if (end-slen > start)
3227 start = end - slen;
3228 if (end-start >= slen)
3229 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3230 else
3231 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003232}
3233
3234
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003235PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003236"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003237\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003238Encodes S using the codec registered for encoding. encoding defaults\n\
3239to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003240handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003241a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3242'xmlcharrefreplace' as well as any other name registered with\n\
3243codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003244
3245static PyObject *
3246string_encode(PyStringObject *self, PyObject *args)
3247{
3248 char *encoding = NULL;
3249 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003250 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003251
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003252 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3253 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003254 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003255 if (v == NULL)
3256 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003257 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3258 PyErr_Format(PyExc_TypeError,
3259 "encoder did not return a string/unicode object "
3260 "(type=%.400s)",
3261 v->ob_type->tp_name);
3262 Py_DECREF(v);
3263 return NULL;
3264 }
3265 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003266
3267 onError:
3268 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003269}
3270
3271
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003272PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273"S.decode([encoding[,errors]]) -> object\n\
3274\n\
3275Decodes S using the codec registered for encoding. encoding defaults\n\
3276to the default encoding. errors may be given to set a different error\n\
3277handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003278a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3279as well as any other name registerd with codecs.register_error that is\n\
3280able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003281
3282static PyObject *
3283string_decode(PyStringObject *self, PyObject *args)
3284{
3285 char *encoding = NULL;
3286 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003287 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003288
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003289 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3290 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003291 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003292 if (v == NULL)
3293 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003294 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3295 PyErr_Format(PyExc_TypeError,
3296 "decoder did not return a string/unicode object "
3297 "(type=%.400s)",
3298 v->ob_type->tp_name);
3299 Py_DECREF(v);
3300 return NULL;
3301 }
3302 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003303
3304 onError:
3305 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003306}
3307
3308
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003309PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310"S.expandtabs([tabsize]) -> string\n\
3311\n\
3312Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003313If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003314
3315static PyObject*
3316string_expandtabs(PyStringObject *self, PyObject *args)
3317{
3318 const char *e, *p;
3319 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003320 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321 PyObject *u;
3322 int tabsize = 8;
3323
3324 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3325 return NULL;
3326
Thomas Wouters7e474022000-07-16 12:04:32 +00003327 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003328 i = j = 0;
3329 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3330 for (p = PyString_AS_STRING(self); p < e; p++)
3331 if (*p == '\t') {
3332 if (tabsize > 0)
3333 j += tabsize - (j % tabsize);
3334 }
3335 else {
3336 j++;
3337 if (*p == '\n' || *p == '\r') {
3338 i += j;
3339 j = 0;
3340 }
3341 }
3342
3343 /* Second pass: create output string and fill it */
3344 u = PyString_FromStringAndSize(NULL, i + j);
3345 if (!u)
3346 return NULL;
3347
3348 j = 0;
3349 q = PyString_AS_STRING(u);
3350
3351 for (p = PyString_AS_STRING(self); p < e; p++)
3352 if (*p == '\t') {
3353 if (tabsize > 0) {
3354 i = tabsize - (j % tabsize);
3355 j += i;
3356 while (i--)
3357 *q++ = ' ';
3358 }
3359 }
3360 else {
3361 j++;
3362 *q++ = *p;
3363 if (*p == '\n' || *p == '\r')
3364 j = 0;
3365 }
3366
3367 return u;
3368}
3369
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003370Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003371pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372{
3373 PyObject *u;
3374
3375 if (left < 0)
3376 left = 0;
3377 if (right < 0)
3378 right = 0;
3379
Tim Peters8fa5dd02001-09-12 02:18:30 +00003380 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381 Py_INCREF(self);
3382 return (PyObject *)self;
3383 }
3384
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003385 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386 left + PyString_GET_SIZE(self) + right);
3387 if (u) {
3388 if (left)
3389 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003390 memcpy(PyString_AS_STRING(u) + left,
3391 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003392 PyString_GET_SIZE(self));
3393 if (right)
3394 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3395 fill, right);
3396 }
3397
3398 return u;
3399}
3400
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003401PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003402"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003403"\n"
3404"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003405"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406
3407static PyObject *
3408string_ljust(PyStringObject *self, PyObject *args)
3409{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003410 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003411 char fillchar = ' ';
3412
Thomas Wouters4abb3662006-04-19 14:50:15 +00003413 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414 return NULL;
3415
Tim Peters8fa5dd02001-09-12 02:18:30 +00003416 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003417 Py_INCREF(self);
3418 return (PyObject*) self;
3419 }
3420
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422}
3423
3424
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003425PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003426"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003427"\n"
3428"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430
3431static PyObject *
3432string_rjust(PyStringObject *self, PyObject *args)
3433{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003434 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003435 char fillchar = ' ';
3436
Thomas Wouters4abb3662006-04-19 14:50:15 +00003437 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438 return NULL;
3439
Tim Peters8fa5dd02001-09-12 02:18:30 +00003440 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003441 Py_INCREF(self);
3442 return (PyObject*) self;
3443 }
3444
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446}
3447
3448
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003449PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003450"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003451"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003452"Return S centered in a string of length width. Padding is\n"
3453"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454
3455static PyObject *
3456string_center(PyStringObject *self, PyObject *args)
3457{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003458 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003459 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461
Thomas Wouters4abb3662006-04-19 14:50:15 +00003462 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003463 return NULL;
3464
Tim Peters8fa5dd02001-09-12 02:18:30 +00003465 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466 Py_INCREF(self);
3467 return (PyObject*) self;
3468 }
3469
3470 marg = width - PyString_GET_SIZE(self);
3471 left = marg / 2 + (marg & width & 1);
3472
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474}
3475
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003476PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003477"S.zfill(width) -> string\n"
3478"\n"
3479"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003480"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003481
3482static PyObject *
3483string_zfill(PyStringObject *self, PyObject *args)
3484{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003485 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003486 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003487 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003488 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003489
Thomas Wouters4abb3662006-04-19 14:50:15 +00003490 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003491 return NULL;
3492
3493 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003494 if (PyString_CheckExact(self)) {
3495 Py_INCREF(self);
3496 return (PyObject*) self;
3497 }
3498 else
3499 return PyString_FromStringAndSize(
3500 PyString_AS_STRING(self),
3501 PyString_GET_SIZE(self)
3502 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003503 }
3504
3505 fill = width - PyString_GET_SIZE(self);
3506
3507 s = pad(self, fill, 0, '0');
3508
3509 if (s == NULL)
3510 return NULL;
3511
3512 p = PyString_AS_STRING(s);
3513 if (p[fill] == '+' || p[fill] == '-') {
3514 /* move sign to beginning of string */
3515 p[0] = p[fill];
3516 p[fill] = '0';
3517 }
3518
3519 return (PyObject*) s;
3520}
3521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003522PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003523"S.isspace() -> bool\n\
3524\n\
3525Return True if all characters in S are whitespace\n\
3526and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003527
3528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003529string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530{
Fred Drakeba096332000-07-09 07:04:36 +00003531 register const unsigned char *p
3532 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003533 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535 /* Shortcut for single character strings */
3536 if (PyString_GET_SIZE(self) == 1 &&
3537 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003539
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003540 /* Special case for empty strings */
3541 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003542 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003543
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544 e = p + PyString_GET_SIZE(self);
3545 for (; p < e; p++) {
3546 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003547 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550}
3551
3552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003553PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003555\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003556Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003557and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558
3559static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003560string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561{
Fred Drakeba096332000-07-09 07:04:36 +00003562 register const unsigned char *p
3563 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003564 register const unsigned char *e;
3565
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566 /* Shortcut for single character strings */
3567 if (PyString_GET_SIZE(self) == 1 &&
3568 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571 /* Special case for empty strings */
3572 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574
3575 e = p + PyString_GET_SIZE(self);
3576 for (; p < e; p++) {
3577 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581}
3582
3583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003584PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003587Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003588and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589
3590static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003591string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592{
Fred Drakeba096332000-07-09 07:04:36 +00003593 register const unsigned char *p
3594 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003595 register const unsigned char *e;
3596
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597 /* Shortcut for single character strings */
3598 if (PyString_GET_SIZE(self) == 1 &&
3599 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003601
3602 /* Special case for empty strings */
3603 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605
3606 e = p + PyString_GET_SIZE(self);
3607 for (; p < e; p++) {
3608 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003612}
3613
3614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003618Return True if all characters in S are digits\n\
3619and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620
3621static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003622string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623{
Fred Drakeba096332000-07-09 07:04:36 +00003624 register const unsigned char *p
3625 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003626 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628 /* Shortcut for single character strings */
3629 if (PyString_GET_SIZE(self) == 1 &&
3630 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003633 /* Special case for empty strings */
3634 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003636
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637 e = p + PyString_GET_SIZE(self);
3638 for (; p < e; p++) {
3639 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643}
3644
3645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003646PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003650at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651
3652static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003653string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654{
Fred Drakeba096332000-07-09 07:04:36 +00003655 register const unsigned char *p
3656 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003657 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658 int cased;
3659
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 /* Shortcut for single character strings */
3661 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003664 /* Special case for empty strings */
3665 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 e = p + PyString_GET_SIZE(self);
3669 cased = 0;
3670 for (; p < e; p++) {
3671 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 else if (!cased && islower(*p))
3674 cased = 1;
3675 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677}
3678
3679
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003680PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003683Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003684at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685
3686static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003687string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688{
Fred Drakeba096332000-07-09 07:04:36 +00003689 register const unsigned char *p
3690 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003691 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692 int cased;
3693
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694 /* Shortcut for single character strings */
3695 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003698 /* Special case for empty strings */
3699 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003700 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003701
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 e = p + PyString_GET_SIZE(self);
3703 cased = 0;
3704 for (; p < e; p++) {
3705 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 else if (!cased && isupper(*p))
3708 cased = 1;
3709 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711}
3712
3713
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003714PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003717Return True if S is a titlecased string and there is at least one\n\
3718character in S, i.e. uppercase characters may only follow uncased\n\
3719characters and lowercase characters only cased ones. Return False\n\
3720otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721
3722static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003723string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724{
Fred Drakeba096332000-07-09 07:04:36 +00003725 register const unsigned char *p
3726 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003727 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 int cased, previous_is_cased;
3729
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 /* Shortcut for single character strings */
3731 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003734 /* Special case for empty strings */
3735 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003737
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 e = p + PyString_GET_SIZE(self);
3739 cased = 0;
3740 previous_is_cased = 0;
3741 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003742 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
3744 if (isupper(ch)) {
3745 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003746 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 previous_is_cased = 1;
3748 cased = 1;
3749 }
3750 else if (islower(ch)) {
3751 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003752 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753 previous_is_cased = 1;
3754 cased = 1;
3755 }
3756 else
3757 previous_is_cased = 0;
3758 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760}
3761
3762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003763PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003764"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765\n\
3766Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003767Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003768is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770static PyObject*
3771string_splitlines(PyStringObject *self, PyObject *args)
3772{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003773 register Py_ssize_t i;
3774 register Py_ssize_t j;
3775 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003776 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 PyObject *list;
3778 PyObject *str;
3779 char *data;
3780
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003781 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 return NULL;
3783
3784 data = PyString_AS_STRING(self);
3785 len = PyString_GET_SIZE(self);
3786
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 list = PyList_New(0);
3788 if (!list)
3789 goto onError;
3790
3791 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003792 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003793
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 /* Find a line and append it */
3795 while (i < len && data[i] != '\n' && data[i] != '\r')
3796 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797
3798 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003799 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 if (i < len) {
3801 if (data[i] == '\r' && i + 1 < len &&
3802 data[i+1] == '\n')
3803 i += 2;
3804 else
3805 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003806 if (keepends)
3807 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 j = i;
3811 }
3812 if (j < len) {
3813 SPLIT_APPEND(data, j, len);
3814 }
3815
3816 return list;
3817
3818 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003819 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 return NULL;
3821}
3822
3823#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003824#undef SPLIT_ADD
3825#undef MAX_PREALLOC
3826#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003828static PyObject *
3829string_getnewargs(PyStringObject *v)
3830{
3831 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3832}
3833
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003834
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003835static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003836string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837 /* Counterparts of the obsolete stropmodule functions; except
3838 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003839 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3840 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003841 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003842 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3843 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003844 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3845 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3846 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3847 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3848 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3849 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3850 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003851 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3852 capitalize__doc__},
3853 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3854 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3855 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003856 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003857 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3858 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3859 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3860 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3861 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3862 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3863 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003864 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3865 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003866 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3867 startswith__doc__},
3868 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3869 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3870 swapcase__doc__},
3871 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3872 translate__doc__},
3873 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3874 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3875 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3876 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3877 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3878 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3879 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3880 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3881 expandtabs__doc__},
3882 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3883 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003884 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003885 {NULL, NULL} /* sentinel */
3886};
3887
Jeremy Hylton938ace62002-07-17 16:30:39 +00003888static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003889str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3890
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003891static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003892string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003893{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003894 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003895 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003896
Guido van Rossumae960af2001-08-30 03:11:59 +00003897 if (type != &PyString_Type)
3898 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003899 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3900 return NULL;
3901 if (x == NULL)
3902 return PyString_FromString("");
3903 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003904}
3905
Guido van Rossumae960af2001-08-30 03:11:59 +00003906static PyObject *
3907str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3908{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003909 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003910 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003911
3912 assert(PyType_IsSubtype(type, &PyString_Type));
3913 tmp = string_new(&PyString_Type, args, kwds);
3914 if (tmp == NULL)
3915 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003916 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003917 n = PyString_GET_SIZE(tmp);
3918 pnew = type->tp_alloc(type, n);
3919 if (pnew != NULL) {
3920 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003921 ((PyStringObject *)pnew)->ob_shash =
3922 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003923 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003924 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003925 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003926 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003927}
3928
Guido van Rossumcacfc072002-05-24 19:01:59 +00003929static PyObject *
3930basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3931{
3932 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003933 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003934 return NULL;
3935}
3936
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003937static PyObject *
3938string_mod(PyObject *v, PyObject *w)
3939{
3940 if (!PyString_Check(v)) {
3941 Py_INCREF(Py_NotImplemented);
3942 return Py_NotImplemented;
3943 }
3944 return PyString_Format(v, w);
3945}
3946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003947PyDoc_STRVAR(basestring_doc,
3948"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003949
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003950static PyNumberMethods string_as_number = {
3951 0, /*nb_add*/
3952 0, /*nb_subtract*/
3953 0, /*nb_multiply*/
3954 0, /*nb_divide*/
3955 string_mod, /*nb_remainder*/
3956};
3957
3958
Guido van Rossumcacfc072002-05-24 19:01:59 +00003959PyTypeObject PyBaseString_Type = {
3960 PyObject_HEAD_INIT(&PyType_Type)
3961 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003962 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003963 0,
3964 0,
3965 0, /* tp_dealloc */
3966 0, /* tp_print */
3967 0, /* tp_getattr */
3968 0, /* tp_setattr */
3969 0, /* tp_compare */
3970 0, /* tp_repr */
3971 0, /* tp_as_number */
3972 0, /* tp_as_sequence */
3973 0, /* tp_as_mapping */
3974 0, /* tp_hash */
3975 0, /* tp_call */
3976 0, /* tp_str */
3977 0, /* tp_getattro */
3978 0, /* tp_setattro */
3979 0, /* tp_as_buffer */
3980 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3981 basestring_doc, /* tp_doc */
3982 0, /* tp_traverse */
3983 0, /* tp_clear */
3984 0, /* tp_richcompare */
3985 0, /* tp_weaklistoffset */
3986 0, /* tp_iter */
3987 0, /* tp_iternext */
3988 0, /* tp_methods */
3989 0, /* tp_members */
3990 0, /* tp_getset */
3991 &PyBaseObject_Type, /* tp_base */
3992 0, /* tp_dict */
3993 0, /* tp_descr_get */
3994 0, /* tp_descr_set */
3995 0, /* tp_dictoffset */
3996 0, /* tp_init */
3997 0, /* tp_alloc */
3998 basestring_new, /* tp_new */
3999 0, /* tp_free */
4000};
4001
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004002PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003"str(object) -> string\n\
4004\n\
4005Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004006If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004008PyTypeObject PyString_Type = {
4009 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004010 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004012 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004013 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004014 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015 (printfunc)string_print, /* tp_print */
4016 0, /* tp_getattr */
4017 0, /* tp_setattr */
4018 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004019 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004020 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004021 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004022 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023 (hashfunc)string_hash, /* tp_hash */
4024 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004025 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004026 PyObject_GenericGetAttr, /* tp_getattro */
4027 0, /* tp_setattro */
4028 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004029 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004030 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 string_doc, /* tp_doc */
4032 0, /* tp_traverse */
4033 0, /* tp_clear */
4034 (richcmpfunc)string_richcompare, /* tp_richcompare */
4035 0, /* tp_weaklistoffset */
4036 0, /* tp_iter */
4037 0, /* tp_iternext */
4038 string_methods, /* tp_methods */
4039 0, /* tp_members */
4040 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004041 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004042 0, /* tp_dict */
4043 0, /* tp_descr_get */
4044 0, /* tp_descr_set */
4045 0, /* tp_dictoffset */
4046 0, /* tp_init */
4047 0, /* tp_alloc */
4048 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004049 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004050};
4051
4052void
Fred Drakeba096332000-07-09 07:04:36 +00004053PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004054{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004056 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004057 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004058 if (w == NULL || !PyString_Check(*pv)) {
4059 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004060 *pv = NULL;
4061 return;
4062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004063 v = string_concat((PyStringObject *) *pv, w);
4064 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065 *pv = v;
4066}
4067
Guido van Rossum013142a1994-08-30 08:19:36 +00004068void
Fred Drakeba096332000-07-09 07:04:36 +00004069PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004070{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 PyString_Concat(pv, w);
4072 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004073}
4074
4075
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004076/* The following function breaks the notion that strings are immutable:
4077 it changes the size of a string. We get away with this only if there
4078 is only one module referencing the object. You can also think of it
4079 as creating a new string object and destroying the old one, only
4080 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004081 already be known to some other part of the code...
4082 Note that if there's not enough memory to resize the string, the original
4083 string object at *pv is deallocated, *pv is set to NULL, an "out of
4084 memory" exception is set, and -1 is returned. Else (on success) 0 is
4085 returned, and the value in *pv may or may not be the same as on input.
4086 As always, an extra byte is allocated for a trailing \0 byte (newsize
4087 does *not* include that), and a trailing \0 byte is stored.
4088*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089
4090int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004091_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004093 register PyObject *v;
4094 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004095 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004096 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4097 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004098 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004099 Py_DECREF(v);
4100 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004101 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004102 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004103 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004104 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 _Py_ForgetReference(v);
4106 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004107 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004108 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004109 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004110 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004111 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004112 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 _Py_NewReference(*pv);
4114 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004115 sv->ob_size = newsize;
4116 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004117 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004118 return 0;
4119}
Guido van Rossume5372401993-03-16 12:15:04 +00004120
4121/* Helpers for formatstring */
4122
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004123Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004124getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004125{
Thomas Wouters977485d2006-02-16 15:59:12 +00004126 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004127 if (argidx < arglen) {
4128 (*p_argidx)++;
4129 if (arglen < 0)
4130 return args;
4131 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004132 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004133 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 PyErr_SetString(PyExc_TypeError,
4135 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004136 return NULL;
4137}
4138
Tim Peters38fd5b62000-09-21 05:43:11 +00004139/* Format codes
4140 * F_LJUST '-'
4141 * F_SIGN '+'
4142 * F_BLANK ' '
4143 * F_ALT '#'
4144 * F_ZERO '0'
4145 */
Guido van Rossume5372401993-03-16 12:15:04 +00004146#define F_LJUST (1<<0)
4147#define F_SIGN (1<<1)
4148#define F_BLANK (1<<2)
4149#define F_ALT (1<<3)
4150#define F_ZERO (1<<4)
4151
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004152Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004153formatfloat(char *buf, size_t buflen, int flags,
4154 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004155{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004156 /* fmt = '%#.' + `prec` + `type`
4157 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004158 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004159 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004160 x = PyFloat_AsDouble(v);
4161 if (x == -1.0 && PyErr_Occurred()) {
4162 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004163 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004164 }
Guido van Rossume5372401993-03-16 12:15:04 +00004165 if (prec < 0)
4166 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004167 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4168 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004169 /* Worst case length calc to ensure no buffer overrun:
4170
4171 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004172 fmt = %#.<prec>g
4173 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004174 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004175 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004176
4177 'f' formats:
4178 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4179 len = 1 + 50 + 1 + prec = 52 + prec
4180
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004181 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004182 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004183
4184 */
4185 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4186 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004187 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004188 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 return -1;
4190 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004191 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4192 (flags&F_ALT) ? "#" : "",
4193 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004194 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004195 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004196}
4197
Tim Peters38fd5b62000-09-21 05:43:11 +00004198/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4199 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4200 * Python's regular ints.
4201 * Return value: a new PyString*, or NULL if error.
4202 * . *pbuf is set to point into it,
4203 * *plen set to the # of chars following that.
4204 * Caller must decref it when done using pbuf.
4205 * The string starting at *pbuf is of the form
4206 * "-"? ("0x" | "0X")? digit+
4207 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004208 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004209 * There will be at least prec digits, zero-filled on the left if
4210 * necessary to get that many.
4211 * val object to be converted
4212 * flags bitmask of format flags; only F_ALT is looked at
4213 * prec minimum number of digits; 0-fill on left if needed
4214 * type a character in [duoxX]; u acts the same as d
4215 *
4216 * CAUTION: o, x and X conversions on regular ints can never
4217 * produce a '-' sign, but can for Python's unbounded ints.
4218 */
4219PyObject*
4220_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4221 char **pbuf, int *plen)
4222{
4223 PyObject *result = NULL;
4224 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004225 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004226 int sign; /* 1 if '-', else 0 */
4227 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004228 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004229 int numdigits; /* len == numnondigits + numdigits */
4230 int numnondigits = 0;
4231
4232 switch (type) {
4233 case 'd':
4234 case 'u':
4235 result = val->ob_type->tp_str(val);
4236 break;
4237 case 'o':
4238 result = val->ob_type->tp_as_number->nb_oct(val);
4239 break;
4240 case 'x':
4241 case 'X':
4242 numnondigits = 2;
4243 result = val->ob_type->tp_as_number->nb_hex(val);
4244 break;
4245 default:
4246 assert(!"'type' not in [duoxX]");
4247 }
4248 if (!result)
4249 return NULL;
4250
4251 /* To modify the string in-place, there can only be one reference. */
4252 if (result->ob_refcnt != 1) {
4253 PyErr_BadInternalCall();
4254 return NULL;
4255 }
4256 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004257 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004258 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004259 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4260 return NULL;
4261 }
4262 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004263 if (buf[len-1] == 'L') {
4264 --len;
4265 buf[len] = '\0';
4266 }
4267 sign = buf[0] == '-';
4268 numnondigits += sign;
4269 numdigits = len - numnondigits;
4270 assert(numdigits > 0);
4271
Tim Petersfff53252001-04-12 18:38:48 +00004272 /* Get rid of base marker unless F_ALT */
4273 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004274 /* Need to skip 0x, 0X or 0. */
4275 int skipped = 0;
4276 switch (type) {
4277 case 'o':
4278 assert(buf[sign] == '0');
4279 /* If 0 is only digit, leave it alone. */
4280 if (numdigits > 1) {
4281 skipped = 1;
4282 --numdigits;
4283 }
4284 break;
4285 case 'x':
4286 case 'X':
4287 assert(buf[sign] == '0');
4288 assert(buf[sign + 1] == 'x');
4289 skipped = 2;
4290 numnondigits -= 2;
4291 break;
4292 }
4293 if (skipped) {
4294 buf += skipped;
4295 len -= skipped;
4296 if (sign)
4297 buf[0] = '-';
4298 }
4299 assert(len == numnondigits + numdigits);
4300 assert(numdigits > 0);
4301 }
4302
4303 /* Fill with leading zeroes to meet minimum width. */
4304 if (prec > numdigits) {
4305 PyObject *r1 = PyString_FromStringAndSize(NULL,
4306 numnondigits + prec);
4307 char *b1;
4308 if (!r1) {
4309 Py_DECREF(result);
4310 return NULL;
4311 }
4312 b1 = PyString_AS_STRING(r1);
4313 for (i = 0; i < numnondigits; ++i)
4314 *b1++ = *buf++;
4315 for (i = 0; i < prec - numdigits; i++)
4316 *b1++ = '0';
4317 for (i = 0; i < numdigits; i++)
4318 *b1++ = *buf++;
4319 *b1 = '\0';
4320 Py_DECREF(result);
4321 result = r1;
4322 buf = PyString_AS_STRING(result);
4323 len = numnondigits + prec;
4324 }
4325
4326 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004327 if (type == 'X') {
4328 /* Need to convert all lower case letters to upper case.
4329 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004330 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004331 if (buf[i] >= 'a' && buf[i] <= 'x')
4332 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004333 }
4334 *pbuf = buf;
4335 *plen = len;
4336 return result;
4337}
4338
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004339Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004340formatint(char *buf, size_t buflen, int flags,
4341 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004342{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004343 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004344 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4345 + 1 + 1 = 24 */
4346 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004347 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004348 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004349
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004350 x = PyInt_AsLong(v);
4351 if (x == -1 && PyErr_Occurred()) {
4352 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004353 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004354 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004355 if (x < 0 && type == 'u') {
4356 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004357 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004358 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4359 sign = "-";
4360 else
4361 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004362 if (prec < 0)
4363 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004364
4365 if ((flags & F_ALT) &&
4366 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004367 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004368 * of issues that cause pain:
4369 * - when 0 is being converted, the C standard leaves off
4370 * the '0x' or '0X', which is inconsistent with other
4371 * %#x/%#X conversions and inconsistent with Python's
4372 * hex() function
4373 * - there are platforms that violate the standard and
4374 * convert 0 with the '0x' or '0X'
4375 * (Metrowerks, Compaq Tru64)
4376 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004377 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004378 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004379 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004380 * We can achieve the desired consistency by inserting our
4381 * own '0x' or '0X' prefix, and substituting %x/%X in place
4382 * of %#x/%#X.
4383 *
4384 * Note that this is the same approach as used in
4385 * formatint() in unicodeobject.c
4386 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004387 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4388 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004389 }
4390 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004391 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4392 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004393 prec, type);
4394 }
4395
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004396 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4397 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004398 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004400 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004401 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004402 return -1;
4403 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004404 if (sign[0])
4405 PyOS_snprintf(buf, buflen, fmt, -x);
4406 else
4407 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004408 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004409}
4410
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004411Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004412formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004413{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004414 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004415 if (PyString_Check(v)) {
4416 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004417 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004418 }
4419 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004420 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004421 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004422 }
4423 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004424 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004425}
4426
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004427/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4428
4429 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4430 chars are formatted. XXX This is a magic number. Each formatting
4431 routine does bounds checking to ensure no overflow, but a better
4432 solution may be to malloc a buffer of appropriate size for each
4433 format. For now, the current solution is sufficient.
4434*/
4435#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004436
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004437PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004438PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004439{
4440 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004441 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004442 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004443 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004444 PyObject *result, *orig_args;
4445#ifdef Py_USING_UNICODE
4446 PyObject *v, *w;
4447#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004448 PyObject *dict = NULL;
4449 if (format == NULL || !PyString_Check(format) || args == NULL) {
4450 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004451 return NULL;
4452 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004453 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004454 fmt = PyString_AS_STRING(format);
4455 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004456 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004457 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004458 if (result == NULL)
4459 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 res = PyString_AsString(result);
4461 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004462 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004463 argidx = 0;
4464 }
4465 else {
4466 arglen = -1;
4467 argidx = -2;
4468 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004469 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4470 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004471 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004472 while (--fmtcnt >= 0) {
4473 if (*fmt != '%') {
4474 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004475 rescnt = fmtcnt + 100;
4476 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004478 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004479 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004481 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004482 }
4483 *res++ = *fmt++;
4484 }
4485 else {
4486 /* Got a format specifier */
4487 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004488 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004489 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004490 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004491 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004492 PyObject *v = NULL;
4493 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004494 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004495 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004496 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004497 char formatbuf[FORMATBUFLEN];
4498 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004499#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004500 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004501 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004502#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004503
Guido van Rossumda9c2711996-12-05 21:58:58 +00004504 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004505 if (*fmt == '(') {
4506 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004507 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004508 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004509 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004510
4511 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004512 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004513 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004514 goto error;
4515 }
4516 ++fmt;
4517 --fmtcnt;
4518 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004519 /* Skip over balanced parentheses */
4520 while (pcount > 0 && --fmtcnt >= 0) {
4521 if (*fmt == ')')
4522 --pcount;
4523 else if (*fmt == '(')
4524 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004525 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004526 }
4527 keylen = fmt - keystart - 1;
4528 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004529 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004530 "incomplete format key");
4531 goto error;
4532 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 key = PyString_FromStringAndSize(keystart,
4534 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004535 if (key == NULL)
4536 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004537 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004538 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004539 args_owned = 0;
4540 }
4541 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004543 if (args == NULL) {
4544 goto error;
4545 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004546 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004547 arglen = -1;
4548 argidx = -2;
4549 }
Guido van Rossume5372401993-03-16 12:15:04 +00004550 while (--fmtcnt >= 0) {
4551 switch (c = *fmt++) {
4552 case '-': flags |= F_LJUST; continue;
4553 case '+': flags |= F_SIGN; continue;
4554 case ' ': flags |= F_BLANK; continue;
4555 case '#': flags |= F_ALT; continue;
4556 case '0': flags |= F_ZERO; continue;
4557 }
4558 break;
4559 }
4560 if (c == '*') {
4561 v = getnextarg(args, arglen, &argidx);
4562 if (v == NULL)
4563 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004564 if (!PyInt_Check(v)) {
4565 PyErr_SetString(PyExc_TypeError,
4566 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004567 goto error;
4568 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004569 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004570 if (width < 0) {
4571 flags |= F_LJUST;
4572 width = -width;
4573 }
Guido van Rossume5372401993-03-16 12:15:04 +00004574 if (--fmtcnt >= 0)
4575 c = *fmt++;
4576 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004577 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004578 width = c - '0';
4579 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004580 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004581 if (!isdigit(c))
4582 break;
4583 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 PyErr_SetString(
4585 PyExc_ValueError,
4586 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004587 goto error;
4588 }
4589 width = width*10 + (c - '0');
4590 }
4591 }
4592 if (c == '.') {
4593 prec = 0;
4594 if (--fmtcnt >= 0)
4595 c = *fmt++;
4596 if (c == '*') {
4597 v = getnextarg(args, arglen, &argidx);
4598 if (v == NULL)
4599 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004600 if (!PyInt_Check(v)) {
4601 PyErr_SetString(
4602 PyExc_TypeError,
4603 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004604 goto error;
4605 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004606 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004607 if (prec < 0)
4608 prec = 0;
4609 if (--fmtcnt >= 0)
4610 c = *fmt++;
4611 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004612 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004613 prec = c - '0';
4614 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004615 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004616 if (!isdigit(c))
4617 break;
4618 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004619 PyErr_SetString(
4620 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004621 "prec too big");
4622 goto error;
4623 }
4624 prec = prec*10 + (c - '0');
4625 }
4626 }
4627 } /* prec */
4628 if (fmtcnt >= 0) {
4629 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004630 if (--fmtcnt >= 0)
4631 c = *fmt++;
4632 }
4633 }
4634 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004635 PyErr_SetString(PyExc_ValueError,
4636 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004637 goto error;
4638 }
4639 if (c != '%') {
4640 v = getnextarg(args, arglen, &argidx);
4641 if (v == NULL)
4642 goto error;
4643 }
4644 sign = 0;
4645 fill = ' ';
4646 switch (c) {
4647 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004648 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004649 len = 1;
4650 break;
4651 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004652#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004653 if (PyUnicode_Check(v)) {
4654 fmt = fmt_start;
4655 argidx = argidx_start;
4656 goto unicode;
4657 }
Georg Brandld45014b2005-10-01 17:06:00 +00004658#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004659 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004660#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004661 if (temp != NULL && PyUnicode_Check(temp)) {
4662 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004663 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004664 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004665 goto unicode;
4666 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004667#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004668 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004669 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004670 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004671 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004672 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004673 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004674 if (!PyString_Check(temp)) {
4675 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004676 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004677 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004678 goto error;
4679 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004680 pbuf = PyString_AS_STRING(temp);
4681 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004682 if (prec >= 0 && len > prec)
4683 len = prec;
4684 break;
4685 case 'i':
4686 case 'd':
4687 case 'u':
4688 case 'o':
4689 case 'x':
4690 case 'X':
4691 if (c == 'i')
4692 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004693 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004694 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004695 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004696 prec, c, &pbuf, &ilen);
4697 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004698 if (!temp)
4699 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004700 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004701 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004702 else {
4703 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004704 len = formatint(pbuf,
4705 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004706 flags, prec, c, v);
4707 if (len < 0)
4708 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004709 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 }
4711 if (flags & F_ZERO)
4712 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004713 break;
4714 case 'e':
4715 case 'E':
4716 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004717 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004718 case 'g':
4719 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004720 if (c == 'F')
4721 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004722 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004723 len = formatfloat(pbuf, sizeof(formatbuf),
4724 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004725 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004726 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004727 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004728 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004729 fill = '0';
4730 break;
4731 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004732#ifdef Py_USING_UNICODE
4733 if (PyUnicode_Check(v)) {
4734 fmt = fmt_start;
4735 argidx = argidx_start;
4736 goto unicode;
4737 }
4738#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004739 pbuf = formatbuf;
4740 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004741 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004742 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004743 break;
4744 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004745 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004746 "unsupported format character '%c' (0x%x) "
4747 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004748 c, c,
4749 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004750 goto error;
4751 }
4752 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004753 if (*pbuf == '-' || *pbuf == '+') {
4754 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004755 len--;
4756 }
4757 else if (flags & F_SIGN)
4758 sign = '+';
4759 else if (flags & F_BLANK)
4760 sign = ' ';
4761 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004762 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004763 }
4764 if (width < len)
4765 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004766 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004767 reslen -= rescnt;
4768 rescnt = width + fmtcnt + 100;
4769 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004770 if (reslen < 0) {
4771 Py_DECREF(result);
4772 return PyErr_NoMemory();
4773 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004774 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004775 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004776 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004777 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004778 }
4779 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004780 if (fill != ' ')
4781 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004782 rescnt--;
4783 if (width > len)
4784 width--;
4785 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004786 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4787 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004788 assert(pbuf[1] == c);
4789 if (fill != ' ') {
4790 *res++ = *pbuf++;
4791 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004792 }
Tim Petersfff53252001-04-12 18:38:48 +00004793 rescnt -= 2;
4794 width -= 2;
4795 if (width < 0)
4796 width = 0;
4797 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004798 }
4799 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004800 do {
4801 --rescnt;
4802 *res++ = fill;
4803 } while (--width > len);
4804 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004805 if (fill == ' ') {
4806 if (sign)
4807 *res++ = sign;
4808 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004809 (c == 'x' || c == 'X')) {
4810 assert(pbuf[0] == '0');
4811 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004812 *res++ = *pbuf++;
4813 *res++ = *pbuf++;
4814 }
4815 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004816 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004817 res += len;
4818 rescnt -= len;
4819 while (--width >= len) {
4820 --rescnt;
4821 *res++ = ' ';
4822 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004823 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004824 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004825 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004826 goto error;
4827 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004828 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004829 } /* '%' */
4830 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004831 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004832 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004833 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004834 goto error;
4835 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004836 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004837 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004838 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004839 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004840 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004841
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004842#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004843 unicode:
4844 if (args_owned) {
4845 Py_DECREF(args);
4846 args_owned = 0;
4847 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004848 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004849 if (PyTuple_Check(orig_args) && argidx > 0) {
4850 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004851 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004852 v = PyTuple_New(n);
4853 if (v == NULL)
4854 goto error;
4855 while (--n >= 0) {
4856 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4857 Py_INCREF(w);
4858 PyTuple_SET_ITEM(v, n, w);
4859 }
4860 args = v;
4861 } else {
4862 Py_INCREF(orig_args);
4863 args = orig_args;
4864 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004865 args_owned = 1;
4866 /* Take what we have of the result and let the Unicode formatting
4867 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004868 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004869 if (_PyString_Resize(&result, rescnt))
4870 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004871 fmtcnt = PyString_GET_SIZE(format) - \
4872 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004873 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4874 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004875 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004876 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004877 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004878 if (v == NULL)
4879 goto error;
4880 /* Paste what we have (result) to what the Unicode formatting
4881 function returned (v) and return the result (or error) */
4882 w = PyUnicode_Concat(result, v);
4883 Py_DECREF(result);
4884 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004885 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004886 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004887#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004888
Guido van Rossume5372401993-03-16 12:15:04 +00004889 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004890 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004891 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004892 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004893 }
Guido van Rossume5372401993-03-16 12:15:04 +00004894 return NULL;
4895}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004896
Guido van Rossum2a61e741997-01-18 07:55:05 +00004897void
Fred Drakeba096332000-07-09 07:04:36 +00004898PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004899{
4900 register PyStringObject *s = (PyStringObject *)(*p);
4901 PyObject *t;
4902 if (s == NULL || !PyString_Check(s))
4903 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004904 /* If it's a string subclass, we don't really know what putting
4905 it in the interned dict might do. */
4906 if (!PyString_CheckExact(s))
4907 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004908 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004909 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004910 if (interned == NULL) {
4911 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004912 if (interned == NULL) {
4913 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004914 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004915 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004917 t = PyDict_GetItem(interned, (PyObject *)s);
4918 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004919 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004920 Py_DECREF(*p);
4921 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922 return;
4923 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004924
Armin Rigo79f7ad22004-08-07 19:27:39 +00004925 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004926 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927 return;
4928 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004929 /* The two references in interned are not counted by refcnt.
4930 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004931 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004932 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004933}
4934
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004935void
4936PyString_InternImmortal(PyObject **p)
4937{
4938 PyString_InternInPlace(p);
4939 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4940 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4941 Py_INCREF(*p);
4942 }
4943}
4944
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945
4946PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004947PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004948{
4949 PyObject *s = PyString_FromString(cp);
4950 if (s == NULL)
4951 return NULL;
4952 PyString_InternInPlace(&s);
4953 return s;
4954}
4955
Guido van Rossum8cf04761997-08-02 02:57:45 +00004956void
Fred Drakeba096332000-07-09 07:04:36 +00004957PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004958{
4959 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004960 for (i = 0; i < UCHAR_MAX + 1; i++) {
4961 Py_XDECREF(characters[i]);
4962 characters[i] = NULL;
4963 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004964 Py_XDECREF(nullstring);
4965 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004966}
Barry Warsawa903ad982001-02-23 16:40:48 +00004967
Barry Warsawa903ad982001-02-23 16:40:48 +00004968void _Py_ReleaseInternedStrings(void)
4969{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004970 PyObject *keys;
4971 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004972 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004973
4974 if (interned == NULL || !PyDict_Check(interned))
4975 return;
4976 keys = PyDict_Keys(interned);
4977 if (keys == NULL || !PyList_Check(keys)) {
4978 PyErr_Clear();
4979 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004980 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004981
4982 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4983 detector, interned strings are not forcibly deallocated; rather, we
4984 give them their stolen references back, and then clear and DECREF
4985 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004986
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004987 fprintf(stderr, "releasing interned strings\n");
4988 n = PyList_GET_SIZE(keys);
4989 for (i = 0; i < n; i++) {
4990 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4991 switch (s->ob_sstate) {
4992 case SSTATE_NOT_INTERNED:
4993 /* XXX Shouldn't happen */
4994 break;
4995 case SSTATE_INTERNED_IMMORTAL:
4996 s->ob_refcnt += 1;
4997 break;
4998 case SSTATE_INTERNED_MORTAL:
4999 s->ob_refcnt += 2;
5000 break;
5001 default:
5002 Py_FatalError("Inconsistent interned string state.");
5003 }
5004 s->ob_sstate = SSTATE_NOT_INTERNED;
5005 }
5006 Py_DECREF(keys);
5007 PyDict_Clear(interned);
5008 Py_DECREF(interned);
5009 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005010}