blob: 8d0f4b81786c63ce3ed7c9b509c7de37eab6bf5e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000732 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000767PyObject *
768PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000770 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000772 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000773 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000774 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000775 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000776 PyErr_SetString(PyExc_OverflowError,
777 "string is too large to make repr");
778 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000779 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000781 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 }
783 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000784 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000785 register Py_UNICODE c;
786 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 int quote;
788
Thomas Wouters7e474022000-07-16 12:04:32 +0000789 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000791 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000792 char *test, *start;
793 start = PyString_AS_STRING(op);
794 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (*test == '"') {
796 quote = '\''; /* switch back to single quote */
797 goto decided;
798 }
799 else if (*test == '\'')
800 quote = '"';
801 }
802 decided:
803 ;
804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000806 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000807 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000808 /* There's at least enough room for a hex escape
809 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000810 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000814 else if (c == '\t')
815 *p++ = '\\', *p++ = 't';
816 else if (c == '\n')
817 *p++ = '\\', *p++ = 'n';
818 else if (c == '\r')
819 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000821 *p++ = '\\';
822 *p++ = 'x';
823 *p++ = hexdigits[(c & 0xf0) >> 4];
824 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 else
827 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000829 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000832 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
833 Py_DECREF(v);
834 return NULL;
835 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000836 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838}
839
Guido van Rossum189f1df2001-05-01 16:51:53 +0000840static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841string_repr(PyObject *op)
842{
843 return PyString_Repr(op, 1);
844}
845
846static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000847string_str(PyObject *s)
848{
Tim Petersc9933152001-10-16 20:18:24 +0000849 assert(PyString_Check(s));
850 if (PyString_CheckExact(s)) {
851 Py_INCREF(s);
852 return s;
853 }
854 else {
855 /* Subtype -- return genuine string with the same value. */
856 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000857 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000858 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859}
860
Martin v. Löwis18e16552006-02-15 17:27:45 +0000861static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000862string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000864 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865}
866
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000867static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000868string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000870 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871 register PyStringObject *op;
872 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000873 if (PyUnicode_Check(bb))
874 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000875 if (PyBytes_Check(bb))
876 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000877 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000878 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 return NULL;
881 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000882#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000884 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000885 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000886 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000887 Py_INCREF(bb);
888 return bb;
889 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 Py_INCREF(a);
891 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000893 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000894 if (size < 0) {
895 PyErr_SetString(PyExc_OverflowError,
896 "strings are too large to concat");
897 return NULL;
898 }
899
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000900 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000901 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000902 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000904 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000905 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000906 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000907 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
908 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000909 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911#undef b
912}
913
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000917 register Py_ssize_t i;
918 register Py_ssize_t j;
919 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000920 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000921 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 if (n < 0)
923 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000924 /* watch out for overflows: the size can overflow int,
925 * and the # of bytes needed can overflow size_t
926 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000927 size = Py_Size(a) * n;
928 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000929 PyErr_SetString(PyExc_OverflowError,
930 "repeated string is too long");
931 return NULL;
932 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000933 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 Py_INCREF(a);
935 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 }
Tim Peterse7c05322004-06-27 17:24:49 +0000937 nbytes = (size_t)size;
938 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000944 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000945 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000947 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000948 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000949 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000950 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000951 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000952 memset(op->ob_sval, a->ob_sval[0] , n);
953 return (PyObject *) op;
954 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000955 i = 0;
956 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000957 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
958 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 }
960 while (i < size) {
961 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 i += j;
964 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966}
967
968/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
969
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000971string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000972 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +0000973 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974{
975 if (i < 0)
976 i = 0;
977 if (j < 0)
978 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000979 if (j > Py_Size(a))
980 j = Py_Size(a);
981 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000982 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 Py_INCREF(a);
984 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985 }
986 if (j < i)
987 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000988 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989}
990
Guido van Rossum9284a572000-03-07 15:53:43 +0000991static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000992string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000993{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000994 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000995 if (PyUnicode_Check(sub_obj))
996 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000997 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000998 PyErr_Format(PyExc_TypeError,
999 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001000 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001001 return -1;
1002 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001003 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001004
Thomas Wouters477c8d52006-05-27 19:21:47 +00001005 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001006}
1007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001011 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001012 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001013 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001014 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015 return NULL;
1016 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001017 pchar = a->ob_sval[i];
1018 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001019 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001020 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001021 else {
1022#ifdef COUNT_ALLOCS
1023 one_strings++;
1024#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001025 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001026 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001027 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
Martin v. Löwiscd353062001-05-24 16:56:35 +00001030static PyObject*
1031string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001033 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 Py_ssize_t len_a, len_b;
1035 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001036 PyObject *result;
1037
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001038 /* Make sure both arguments are strings. */
1039 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001040 result = Py_NotImplemented;
1041 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001042 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001043 if (a == b) {
1044 switch (op) {
1045 case Py_EQ:case Py_LE:case Py_GE:
1046 result = Py_True;
1047 goto out;
1048 case Py_NE:case Py_LT:case Py_GT:
1049 result = Py_False;
1050 goto out;
1051 }
1052 }
1053 if (op == Py_EQ) {
1054 /* Supporting Py_NE here as well does not save
1055 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001058 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001059 result = Py_True;
1060 } else {
1061 result = Py_False;
1062 }
1063 goto out;
1064 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001065 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001066 min_len = (len_a < len_b) ? len_a : len_b;
1067 if (min_len > 0) {
1068 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1069 if (c==0)
1070 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001071 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001072 c = 0;
1073 if (c == 0)
1074 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1075 switch (op) {
1076 case Py_LT: c = c < 0; break;
1077 case Py_LE: c = c <= 0; break;
1078 case Py_EQ: assert(0); break; /* unreachable */
1079 case Py_NE: c = c != 0; break;
1080 case Py_GT: c = c > 0; break;
1081 case Py_GE: c = c >= 0; break;
1082 default:
1083 result = Py_NotImplemented;
1084 goto out;
1085 }
1086 result = c ? Py_True : Py_False;
1087 out:
1088 Py_INCREF(result);
1089 return result;
1090}
1091
1092int
1093_PyString_Eq(PyObject *o1, PyObject *o2)
1094{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 PyStringObject *a = (PyStringObject*) o1;
1096 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001097 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001099 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100}
1101
Guido van Rossum9bfef441993-03-29 10:43:31 +00001102static long
Fred Drakeba096332000-07-09 07:04:36 +00001103string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001104{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001105 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 register unsigned char *p;
1107 register long x;
1108
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001109 if (a->ob_shash != -1)
1110 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001111 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001112 p = (unsigned char *) a->ob_sval;
1113 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001114 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001115 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001116 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001117 if (x == -1)
1118 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001119 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001120 return x;
1121}
1122
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001123static PyObject*
1124string_subscript(PyStringObject* self, PyObject* item)
1125{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001126 if (PyIndex_Check(item)) {
1127 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001128 if (i == -1 && PyErr_Occurred())
1129 return NULL;
1130 if (i < 0)
1131 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001132 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001133 }
1134 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001135 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001136 char* source_buf;
1137 char* result_buf;
1138 PyObject* result;
1139
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141 PyString_GET_SIZE(self),
1142 &start, &stop, &step, &slicelength) < 0) {
1143 return NULL;
1144 }
1145
1146 if (slicelength <= 0) {
1147 return PyString_FromStringAndSize("", 0);
1148 }
Thomas Woutersed03b412007-08-28 21:37:11 +00001149 else if (start == 0 && step == 1 &&
1150 slicelength == PyString_GET_SIZE(self) &&
1151 PyString_CheckExact(self)) {
1152 Py_INCREF(self);
1153 return (PyObject *)self;
1154 }
1155 else if (step == 1) {
1156 return PyString_FromStringAndSize(
1157 PyString_AS_STRING(self) + start,
1158 slicelength);
1159 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001160 else {
1161 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001162 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001163 if (result_buf == NULL)
1164 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001165
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001166 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001167 cur += step, i++) {
1168 result_buf[i] = source_buf[cur];
1169 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001170
1171 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001172 slicelength);
1173 PyMem_Free(result_buf);
1174 return result;
1175 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001176 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001177 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001178 PyErr_Format(PyExc_TypeError,
1179 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001180 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001181 return NULL;
1182 }
1183}
1184
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001185static int
1186string_buffer_getbuffer(PyStringObject *self, PyBuffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001187{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001188 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001189}
1190
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001191static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001193 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001194 (ssizeargfunc)string_repeat, /*sq_repeat*/
1195 (ssizeargfunc)string_item, /*sq_item*/
1196 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001197 0, /*sq_ass_item*/
1198 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001199 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001200};
1201
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001202static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001203 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 (binaryfunc)string_subscript,
1205 0,
1206};
1207
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001209 (getbufferproc)string_buffer_getbuffer,
1210 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001211};
1212
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214#define LEFTSTRIP 0
1215#define RIGHTSTRIP 1
1216#define BOTHSTRIP 2
1217
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001218/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001219static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1220
1221#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001222
Thomas Wouters477c8d52006-05-27 19:21:47 +00001223
1224/* Don't call if length < 2 */
1225#define Py_STRING_MATCH(target, offset, pattern, length) \
1226 (target[offset] == pattern[0] && \
1227 target[offset+length-1] == pattern[length-1] && \
1228 !memcmp(target+offset+1, pattern+1, length-2) )
1229
1230
1231/* Overallocate the initial list to reduce the number of reallocs for small
1232 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1233 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1234 text (roughly 11 words per line) and field delimited data (usually 1-10
1235 fields). For large strings the split algorithms are bandwidth limited
1236 so increasing the preallocation likely will not improve things.*/
1237
1238#define MAX_PREALLOC 12
1239
1240/* 5 splits gives 6 elements */
1241#define PREALLOC_SIZE(maxsplit) \
1242 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1243
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001244#define SPLIT_APPEND(data, left, right) \
1245 str = PyString_FromStringAndSize((data) + (left), \
1246 (right) - (left)); \
1247 if (str == NULL) \
1248 goto onError; \
1249 if (PyList_Append(list, str)) { \
1250 Py_DECREF(str); \
1251 goto onError; \
1252 } \
1253 else \
1254 Py_DECREF(str);
1255
Thomas Wouters477c8d52006-05-27 19:21:47 +00001256#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001257 str = PyString_FromStringAndSize((data) + (left), \
1258 (right) - (left)); \
1259 if (str == NULL) \
1260 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001261 if (count < MAX_PREALLOC) { \
1262 PyList_SET_ITEM(list, count, str); \
1263 } else { \
1264 if (PyList_Append(list, str)) { \
1265 Py_DECREF(str); \
1266 goto onError; \
1267 } \
1268 else \
1269 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001270 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001271 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272
Thomas Wouters477c8d52006-05-27 19:21:47 +00001273/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001274#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275
1276#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1277#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1278#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1279#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1280
1281Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001282split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001285 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001286 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001287
1288 if (list == NULL)
1289 return NULL;
1290
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 i = j = 0;
1292
1293 while (maxsplit-- > 0) {
1294 SKIP_SPACE(s, i, len);
1295 if (i==len) break;
1296 j = i; i++;
1297 SKIP_NONSPACE(s, i, len);
1298 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001300
1301 if (i < len) {
1302 /* Only occurs when maxsplit was reached */
1303 /* Skip any remaining whitespace and copy to end of string */
1304 SKIP_SPACE(s, i, len);
1305 if (i != len)
1306 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001308 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 Py_DECREF(list);
1312 return NULL;
1313}
1314
Thomas Wouters477c8d52006-05-27 19:21:47 +00001315Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001316split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001317{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001318 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001319 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001320 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001321
1322 if (list == NULL)
1323 return NULL;
1324
Thomas Wouters477c8d52006-05-27 19:21:47 +00001325 i = j = 0;
1326 while ((j < len) && (maxcount-- > 0)) {
1327 for(; j<len; j++) {
1328 /* I found that using memchr makes no difference */
1329 if (s[j] == ch) {
1330 SPLIT_ADD(s, i, j);
1331 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001332 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001333 }
1334 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336 if (i <= len) {
1337 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001339 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001340 return list;
1341
1342 onError:
1343 Py_DECREF(list);
1344 return NULL;
1345}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001347PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348"S.split([sep [,maxsplit]]) -> list of strings\n\
1349\n\
1350Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001352splits are done. If sep is not specified or is None, any\n\
1353whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354
1355static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001356string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001358 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001359 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001361 PyObject *list, *str, *subobj = Py_None;
1362#ifdef USE_FAST
1363 Py_ssize_t pos;
1364#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001366 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001368 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001369 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001370 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372 if (PyString_Check(subobj)) {
1373 sub = PyString_AS_STRING(subobj);
1374 n = PyString_GET_SIZE(subobj);
1375 }
1376 else if (PyUnicode_Check(subobj))
1377 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1378 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1379 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001380
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381 if (n == 0) {
1382 PyErr_SetString(PyExc_ValueError, "empty separator");
1383 return NULL;
1384 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001385 else if (n == 1)
1386 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387
Thomas Wouters477c8d52006-05-27 19:21:47 +00001388 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 if (list == NULL)
1390 return NULL;
1391
Thomas Wouters477c8d52006-05-27 19:21:47 +00001392#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001394 while (maxsplit-- > 0) {
1395 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1396 if (pos < 0)
1397 break;
1398 j = i+pos;
1399 SPLIT_ADD(s, i, j);
1400 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001402#else
1403 i = j = 0;
1404 while ((j+n <= len) && (maxsplit-- > 0)) {
1405 for (; j+n <= len; j++) {
1406 if (Py_STRING_MATCH(s, j, sub, n)) {
1407 SPLIT_ADD(s, i, j);
1408 i = j = j + n;
1409 break;
1410 }
1411 }
1412 }
1413#endif
1414 SPLIT_ADD(s, i, len);
1415 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416 return list;
1417
Thomas Wouters477c8d52006-05-27 19:21:47 +00001418 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 Py_DECREF(list);
1420 return NULL;
1421}
1422
Thomas Wouters477c8d52006-05-27 19:21:47 +00001423PyDoc_STRVAR(partition__doc__,
1424"S.partition(sep) -> (head, sep, tail)\n\
1425\n\
1426Searches for the separator sep in S, and returns the part before it,\n\
1427the separator itself, and the part after it. If the separator is not\n\
1428found, returns S and two empty strings.");
1429
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001430static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001431string_partition(PyStringObject *self, PyObject *sep_obj)
1432{
1433 const char *sep;
1434 Py_ssize_t sep_len;
1435
1436 if (PyString_Check(sep_obj)) {
1437 sep = PyString_AS_STRING(sep_obj);
1438 sep_len = PyString_GET_SIZE(sep_obj);
1439 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001440 else if (PyUnicode_Check(sep_obj))
1441 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001442 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1443 return NULL;
1444
1445 return stringlib_partition(
1446 (PyObject*) self,
1447 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1448 sep_obj, sep, sep_len
1449 );
1450}
1451
1452PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001453"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001454\n\
1455Searches for the separator sep in S, starting at the end of S, and returns\n\
1456the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001457separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001458
1459static PyObject *
1460string_rpartition(PyStringObject *self, PyObject *sep_obj)
1461{
1462 const char *sep;
1463 Py_ssize_t sep_len;
1464
1465 if (PyString_Check(sep_obj)) {
1466 sep = PyString_AS_STRING(sep_obj);
1467 sep_len = PyString_GET_SIZE(sep_obj);
1468 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469 else if (PyUnicode_Check(sep_obj))
1470 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001471 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1472 return NULL;
1473
1474 return stringlib_rpartition(
1475 (PyObject*) self,
1476 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1477 sep_obj, sep, sep_len
1478 );
1479}
1480
1481Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001482rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001483{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001484 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001485 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001487
1488 if (list == NULL)
1489 return NULL;
1490
Thomas Wouters477c8d52006-05-27 19:21:47 +00001491 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001492
Thomas Wouters477c8d52006-05-27 19:21:47 +00001493 while (maxsplit-- > 0) {
1494 RSKIP_SPACE(s, i);
1495 if (i<0) break;
1496 j = i; i--;
1497 RSKIP_NONSPACE(s, i);
1498 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001499 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001500 if (i >= 0) {
1501 /* Only occurs when maxsplit was reached */
1502 /* Skip any remaining whitespace and copy to beginning of string */
1503 RSKIP_SPACE(s, i);
1504 if (i >= 0)
1505 SPLIT_ADD(s, 0, i + 1);
1506
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001507 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001508 FIX_PREALLOC_SIZE(list);
1509 if (PyList_Reverse(list) < 0)
1510 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001511 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001513 Py_DECREF(list);
1514 return NULL;
1515}
1516
Thomas Wouters477c8d52006-05-27 19:21:47 +00001517Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001518rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001519{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001520 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001521 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001522 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001523
1524 if (list == NULL)
1525 return NULL;
1526
Thomas Wouters477c8d52006-05-27 19:21:47 +00001527 i = j = len - 1;
1528 while ((i >= 0) && (maxcount-- > 0)) {
1529 for (; i >= 0; i--) {
1530 if (s[i] == ch) {
1531 SPLIT_ADD(s, i + 1, j + 1);
1532 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001533 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001534 }
1535 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001536 }
1537 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001538 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001539 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540 FIX_PREALLOC_SIZE(list);
1541 if (PyList_Reverse(list) < 0)
1542 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001543 return list;
1544
1545 onError:
1546 Py_DECREF(list);
1547 return NULL;
1548}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001549
1550PyDoc_STRVAR(rsplit__doc__,
1551"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1552\n\
1553Return a list of the words in the string S, using sep as the\n\
1554delimiter string, starting at the end of the string and working\n\
1555to the front. If maxsplit is given, at most maxsplit splits are\n\
1556done. If sep is not specified or is None, any whitespace string\n\
1557is a separator.");
1558
1559static PyObject *
1560string_rsplit(PyStringObject *self, PyObject *args)
1561{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001562 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001563 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001564 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001566
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001567 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001568 return NULL;
1569 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001570 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001571 if (subobj == Py_None)
1572 return rsplit_whitespace(s, len, maxsplit);
1573 if (PyString_Check(subobj)) {
1574 sub = PyString_AS_STRING(subobj);
1575 n = PyString_GET_SIZE(subobj);
1576 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001577 else if (PyUnicode_Check(subobj))
1578 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582 if (n == 0) {
1583 PyErr_SetString(PyExc_ValueError, "empty separator");
1584 return NULL;
1585 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001586 else if (n == 1)
1587 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001588
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 if (list == NULL)
1591 return NULL;
1592
1593 j = len;
1594 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 while ( (i >= 0) && (maxsplit-- > 0) ) {
1597 for (; i>=0; i--) {
1598 if (Py_STRING_MATCH(s, i, sub, n)) {
1599 SPLIT_ADD(s, i + n, j);
1600 j = i;
1601 i -= n;
1602 break;
1603 }
1604 }
1605 }
1606 SPLIT_ADD(s, 0, j);
1607 FIX_PREALLOC_SIZE(list);
1608 if (PyList_Reverse(list) < 0)
1609 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001610 return list;
1611
Thomas Wouters477c8d52006-05-27 19:21:47 +00001612onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613 Py_DECREF(list);
1614 return NULL;
1615}
1616
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619"S.join(sequence) -> string\n\
1620\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001622sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623
1624static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001625string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626{
1627 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001628 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001632 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001633 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001634 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 seq = PySequence_Fast(orig, "");
1637 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 return NULL;
1639 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001640
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001641 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001642 if (seqlen == 0) {
1643 Py_DECREF(seq);
1644 return PyString_FromString("");
1645 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001647 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001648 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1649 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001650 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001651 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001652 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001654
Raymond Hettinger674f2412004-08-23 23:23:54 +00001655 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001656 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001657 * Do a pre-pass to figure out the total amount of space we'll
1658 * need (sz), see whether any argument is absurd, and defer to
1659 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001663 item = PySequence_Fast_GET_ITEM(seq, i);
1664 if (!PyString_Check(item)){
1665 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001666 /* Defer to Unicode join.
1667 * CAUTION: There's no gurantee that the
1668 * original sequence can be iterated over
1669 * again, so we must pass seq here.
1670 */
1671 PyObject *result;
1672 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001673 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001674 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001675 }
1676 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001677 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001678 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001679 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001680 Py_DECREF(seq);
1681 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001682 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001683 sz += PyString_GET_SIZE(item);
1684 if (i != 0)
1685 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001686 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001687 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001688 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001689 Py_DECREF(seq);
1690 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001692 }
1693
1694 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001695 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001696 if (res == NULL) {
1697 Py_DECREF(seq);
1698 return NULL;
1699 }
1700
1701 /* Catenate everything. */
1702 p = PyString_AS_STRING(res);
1703 for (i = 0; i < seqlen; ++i) {
1704 size_t n;
1705 item = PySequence_Fast_GET_ITEM(seq, i);
1706 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001707 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001708 p += n;
1709 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001710 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001711 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001712 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001714
Jeremy Hylton49048292000-07-11 03:28:17 +00001715 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717}
1718
Tim Peters52e155e2001-06-16 05:42:57 +00001719PyObject *
1720_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001721{
Tim Petersa7259592001-06-16 05:11:17 +00001722 assert(sep != NULL && PyString_Check(sep));
1723 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001724 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001725}
1726
Thomas Wouters477c8d52006-05-27 19:21:47 +00001727Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001728string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001729{
1730 if (*end > len)
1731 *end = len;
1732 else if (*end < 0)
1733 *end += len;
1734 if (*end < 0)
1735 *end = 0;
1736 if (*start < 0)
1737 *start += len;
1738 if (*start < 0)
1739 *start = 0;
1740}
1741
Thomas Wouters477c8d52006-05-27 19:21:47 +00001742Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001743string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001746 const char *sub;
1747 Py_ssize_t sub_len;
1748 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Thomas Wouters477c8d52006-05-27 19:21:47 +00001750 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1751 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 return -2;
1753 if (PyString_Check(subobj)) {
1754 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001755 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 }
1757 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001758 return PyUnicode_Find(
1759 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001760 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001761 /* XXX - the "expected a character buffer object" is pretty
1762 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 return -2;
1764
Thomas Wouters477c8d52006-05-27 19:21:47 +00001765 if (dir > 0)
1766 return stringlib_find_slice(
1767 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1768 sub, sub_len, start, end);
1769 else
1770 return stringlib_rfind_slice(
1771 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1772 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773}
1774
1775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001776PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777"S.find(sub [,start [,end]]) -> int\n\
1778\n\
1779Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001780such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781arguments start and end are interpreted as in slice notation.\n\
1782\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001783Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784
1785static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001786string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001788 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789 if (result == -2)
1790 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001791 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792}
1793
1794
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796"S.index(sub [,start [,end]]) -> int\n\
1797\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799
1800static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001801string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001803 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 if (result == -2)
1805 return NULL;
1806 if (result == -1) {
1807 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001808 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 return NULL;
1810 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001811 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812}
1813
1814
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816"S.rfind(sub [,start [,end]]) -> int\n\
1817\n\
1818Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001819such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820arguments start and end are interpreted as in slice notation.\n\
1821\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001822Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823
1824static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001825string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001827 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 if (result == -2)
1829 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001830 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
1833
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001834PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835"S.rindex(sub [,start [,end]]) -> int\n\
1836\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001837Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838
1839static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001840string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 if (result == -2)
1844 return NULL;
1845 if (result == -1) {
1846 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001847 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 return NULL;
1849 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001850 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851}
1852
1853
Thomas Wouters477c8d52006-05-27 19:21:47 +00001854Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001855do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1856{
1857 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001859 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1861 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001862
1863 i = 0;
1864 if (striptype != RIGHTSTRIP) {
1865 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1866 i++;
1867 }
1868 }
1869
1870 j = len;
1871 if (striptype != LEFTSTRIP) {
1872 do {
1873 j--;
1874 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1875 j++;
1876 }
1877
1878 if (i == 0 && j == len && PyString_CheckExact(self)) {
1879 Py_INCREF(self);
1880 return (PyObject*)self;
1881 }
1882 else
1883 return PyString_FromStringAndSize(s+i, j-i);
1884}
1885
1886
Thomas Wouters477c8d52006-05-27 19:21:47 +00001887Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001888do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889{
1890 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001891 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893 i = 0;
1894 if (striptype != RIGHTSTRIP) {
1895 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1896 i++;
1897 }
1898 }
1899
1900 j = len;
1901 if (striptype != LEFTSTRIP) {
1902 do {
1903 j--;
1904 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1905 j++;
1906 }
1907
Tim Peters8fa5dd02001-09-12 02:18:30 +00001908 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 Py_INCREF(self);
1910 return (PyObject*)self;
1911 }
1912 else
1913 return PyString_FromStringAndSize(s+i, j-i);
1914}
1915
1916
Thomas Wouters477c8d52006-05-27 19:21:47 +00001917Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001918do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1919{
1920 PyObject *sep = NULL;
1921
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001922 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001923 return NULL;
1924
1925 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001926 if (PyString_Check(sep))
1927 return do_xstrip(self, striptype, sep);
1928 else if (PyUnicode_Check(sep)) {
1929 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1930 PyObject *res;
1931 if (uniself==NULL)
1932 return NULL;
1933 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1934 striptype, sep);
1935 Py_DECREF(uniself);
1936 return res;
1937 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001938 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001939 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001940 STRIPNAME(striptype));
1941 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942 }
1943
1944 return do_strip(self, striptype);
1945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001949"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950\n\
1951Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001952whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955
1956static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, BOTHSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963}
1964
1965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001967"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001970If chars is given and not None, remove characters in chars instead.\n\
1971If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
1973static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976 if (PyTuple_GET_SIZE(args) == 0)
1977 return do_strip(self, LEFTSTRIP); /* Common case */
1978 else
1979 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001984"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001987If chars is given and not None, remove characters in chars instead.\n\
1988If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
1990static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001993 if (PyTuple_GET_SIZE(args) == 0)
1994 return do_strip(self, RIGHTSTRIP); /* Common case */
1995 else
1996 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997}
1998
1999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001"S.lower() -> string\n\
2002\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
Thomas Wouters477c8d52006-05-27 19:21:47 +00002005/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2006#ifndef _tolower
2007#define _tolower tolower
2008#endif
2009
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002011string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002013 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002015 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002017 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002018 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002020
2021 s = PyString_AS_STRING(newobj);
2022
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002023 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002024
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002026 int c = Py_CHARMASK(s[i]);
2027 if (isupper(c))
2028 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002030
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002031 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032}
2033
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002034PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035"S.upper() -> string\n\
2036\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002037Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038
Thomas Wouters477c8d52006-05-27 19:21:47 +00002039#ifndef _toupper
2040#define _toupper toupper
2041#endif
2042
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002044string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002046 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002047 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002048 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002050 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002051 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002053
2054 s = PyString_AS_STRING(newobj);
2055
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002056 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002057
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002059 int c = Py_CHARMASK(s[i]);
2060 if (islower(c))
2061 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002063
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002064 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065}
2066
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002067PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068"S.title() -> string\n\
2069\n\
2070Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002071characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072
2073static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002074string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002075{
2076 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002077 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002079 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002081 newobj = PyString_FromStringAndSize(NULL, n);
2082 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002084 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 for (i = 0; i < n; i++) {
2086 int c = Py_CHARMASK(*s++);
2087 if (islower(c)) {
2088 if (!previous_is_cased)
2089 c = toupper(c);
2090 previous_is_cased = 1;
2091 } else if (isupper(c)) {
2092 if (previous_is_cased)
2093 c = tolower(c);
2094 previous_is_cased = 1;
2095 } else
2096 previous_is_cased = 0;
2097 *s_new++ = c;
2098 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002099 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002100}
2101
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002102PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103"S.capitalize() -> string\n\
2104\n\
2105Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002106capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107
2108static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002109string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110{
2111 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002112 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002113 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002115 newobj = PyString_FromStringAndSize(NULL, n);
2116 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002118 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119 if (0 < n) {
2120 int c = Py_CHARMASK(*s++);
2121 if (islower(c))
2122 *s_new = toupper(c);
2123 else
2124 *s_new = c;
2125 s_new++;
2126 }
2127 for (i = 1; i < n; i++) {
2128 int c = Py_CHARMASK(*s++);
2129 if (isupper(c))
2130 *s_new = tolower(c);
2131 else
2132 *s_new = c;
2133 s_new++;
2134 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002135 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136}
2137
2138
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002139PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140"S.count(sub[, start[, end]]) -> int\n\
2141\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142Return the number of non-overlapping occurrences of substring sub in\n\
2143string S[start:end]. Optional arguments start and end are interpreted\n\
2144as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
2146static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002147string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002149 PyObject *sub_obj;
2150 const char *str = PyString_AS_STRING(self), *sub;
2151 Py_ssize_t sub_len;
2152 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153
Thomas Wouters477c8d52006-05-27 19:21:47 +00002154 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2155 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002157
Thomas Wouters477c8d52006-05-27 19:21:47 +00002158 if (PyString_Check(sub_obj)) {
2159 sub = PyString_AS_STRING(sub_obj);
2160 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002161 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002162 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002163 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002165 if (count == -1)
2166 return NULL;
2167 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002169 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002170 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 return NULL;
2172
Thomas Wouters477c8d52006-05-27 19:21:47 +00002173 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002174
Thomas Wouters477c8d52006-05-27 19:21:47 +00002175 return PyInt_FromSsize_t(
2176 stringlib_count(str + start, end - start, sub, sub_len)
2177 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178}
2179
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002180PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181"S.swapcase() -> string\n\
2182\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185
2186static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002187string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188{
2189 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002190 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002191 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002193 newobj = PyString_FromStringAndSize(NULL, n);
2194 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002196 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 for (i = 0; i < n; i++) {
2198 int c = Py_CHARMASK(*s++);
2199 if (islower(c)) {
2200 *s_new = toupper(c);
2201 }
2202 else if (isupper(c)) {
2203 *s_new = tolower(c);
2204 }
2205 else
2206 *s_new = c;
2207 s_new++;
2208 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002209 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.translate(table [,deletechars]) -> string\n\
2215\n\
2216Return a copy of the string S, where all characters occurring\n\
2217in the optional argument deletechars are removed, and the\n\
2218remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002219translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220
2221static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002222string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002225 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002226 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002228 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002229 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 PyObject *result;
2231 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002234 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237
2238 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002239 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240 tablen = PyString_GET_SIZE(tableobj);
2241 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002242 else if (tableobj == Py_None) {
2243 table = NULL;
2244 tablen = 256;
2245 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002247 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248 parameter; instead a mapping to None will cause characters
2249 to be deleted. */
2250 if (delobj != NULL) {
2251 PyErr_SetString(PyExc_TypeError,
2252 "deletions are implemented differently for unicode");
2253 return NULL;
2254 }
2255 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2256 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002257 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259
Martin v. Löwis00b61272002-12-12 20:03:19 +00002260 if (tablen != 256) {
2261 PyErr_SetString(PyExc_ValueError,
2262 "translation table must be 256 characters long");
2263 return NULL;
2264 }
2265
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 if (delobj != NULL) {
2267 if (PyString_Check(delobj)) {
2268 del_table = PyString_AS_STRING(delobj);
2269 dellen = PyString_GET_SIZE(delobj);
2270 }
2271 else if (PyUnicode_Check(delobj)) {
2272 PyErr_SetString(PyExc_TypeError,
2273 "deletions are implemented differently for unicode");
2274 return NULL;
2275 }
2276 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2277 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 }
2279 else {
2280 del_table = NULL;
2281 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282 }
2283
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002284 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285 result = PyString_FromStringAndSize((char *)NULL, inlen);
2286 if (result == NULL)
2287 return NULL;
2288 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002289 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290
Guido van Rossumd8faa362007-04-27 19:54:29 +00002291 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292 /* If no deletions are required, use faster code */
2293 for (i = inlen; --i >= 0; ) {
2294 c = Py_CHARMASK(*input++);
2295 if (Py_CHARMASK((*output++ = table[c])) != c)
2296 changed = 1;
2297 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002298 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299 return result;
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304
Guido van Rossumd8faa362007-04-27 19:54:29 +00002305 if (table == NULL) {
2306 for (i = 0; i < 256; i++)
2307 trans_table[i] = Py_CHARMASK(i);
2308 } else {
2309 for (i = 0; i < 256; i++)
2310 trans_table[i] = Py_CHARMASK(table[i]);
2311 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312
2313 for (i = 0; i < dellen; i++)
2314 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2315
2316 for (i = inlen; --i >= 0; ) {
2317 c = Py_CHARMASK(*input++);
2318 if (trans_table[c] != -1)
2319 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2320 continue;
2321 changed = 1;
2322 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002323 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324 Py_DECREF(result);
2325 Py_INCREF(input_obj);
2326 return input_obj;
2327 }
2328 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002329 if (inlen > 0)
2330 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return result;
2332}
2333
2334
Thomas Wouters477c8d52006-05-27 19:21:47 +00002335#define FORWARD 1
2336#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337
Thomas Wouters477c8d52006-05-27 19:21:47 +00002338/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339
Thomas Wouters477c8d52006-05-27 19:21:47 +00002340#define findchar(target, target_len, c) \
2341 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
Thomas Wouters477c8d52006-05-27 19:21:47 +00002343/* String ops must return a string. */
2344/* If the object is subclass of string, create a copy */
2345Py_LOCAL(PyStringObject *)
2346return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002348 if (PyString_CheckExact(self)) {
2349 Py_INCREF(self);
2350 return self;
2351 }
2352 return (PyStringObject *)PyString_FromStringAndSize(
2353 PyString_AS_STRING(self),
2354 PyString_GET_SIZE(self));
2355}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002358countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359{
2360 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361 const char *start=target;
2362 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 while ( (start=findchar(start, end-start, c)) != NULL ) {
2365 count++;
2366 if (count >= maxcount)
2367 break;
2368 start += 1;
2369 }
2370 return count;
2371}
2372
2373Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002374findstring(const char *target, Py_ssize_t target_len,
2375 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002376 Py_ssize_t start,
2377 Py_ssize_t end,
2378 int direction)
2379{
2380 if (start < 0) {
2381 start += target_len;
2382 if (start < 0)
2383 start = 0;
2384 }
2385 if (end > target_len) {
2386 end = target_len;
2387 } else if (end < 0) {
2388 end += target_len;
2389 if (end < 0)
2390 end = 0;
2391 }
2392
2393 /* zero-length substrings always match at the first attempt */
2394 if (pattern_len == 0)
2395 return (direction > 0) ? start : end;
2396
2397 end -= pattern_len;
2398
2399 if (direction < 0) {
2400 for (; end >= start; end--)
2401 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2402 return end;
2403 } else {
2404 for (; start <= end; start++)
2405 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2406 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 }
2408 return -1;
2409}
2410
Thomas Wouters477c8d52006-05-27 19:21:47 +00002411Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002412countstring(const char *target, Py_ssize_t target_len,
2413 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002414 Py_ssize_t start,
2415 Py_ssize_t end,
2416 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002418 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420 if (start < 0) {
2421 start += target_len;
2422 if (start < 0)
2423 start = 0;
2424 }
2425 if (end > target_len) {
2426 end = target_len;
2427 } else if (end < 0) {
2428 end += target_len;
2429 if (end < 0)
2430 end = 0;
2431 }
2432
2433 /* zero-length substrings match everywhere */
2434 if (pattern_len == 0 || maxcount == 0) {
2435 if (target_len+1 < maxcount)
2436 return target_len+1;
2437 return maxcount;
2438 }
2439
2440 end -= pattern_len;
2441 if (direction < 0) {
2442 for (; (end >= start); end--)
2443 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2444 count++;
2445 if (--maxcount <= 0) break;
2446 end -= pattern_len-1;
2447 }
2448 } else {
2449 for (; (start <= end); start++)
2450 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2451 count++;
2452 if (--maxcount <= 0)
2453 break;
2454 start += pattern_len-1;
2455 }
2456 }
2457 return count;
2458}
2459
2460
2461/* Algorithms for different cases of string replacement */
2462
2463/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2464Py_LOCAL(PyStringObject *)
2465replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002466 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002467 Py_ssize_t maxcount)
2468{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469 char *self_s, *result_s;
2470 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471 Py_ssize_t count, i, product;
2472 PyStringObject *result;
2473
2474 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002475
Thomas Wouters477c8d52006-05-27 19:21:47 +00002476 /* 1 at the end plus 1 after every character */
2477 count = self_len+1;
2478 if (maxcount < count)
2479 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002480
Thomas Wouters477c8d52006-05-27 19:21:47 +00002481 /* Check for overflow */
2482 /* result_len = count * to_len + self_len; */
2483 product = count * to_len;
2484 if (product / to_len != count) {
2485 PyErr_SetString(PyExc_OverflowError,
2486 "replace string is too long");
2487 return NULL;
2488 }
2489 result_len = product + self_len;
2490 if (result_len < 0) {
2491 PyErr_SetString(PyExc_OverflowError,
2492 "replace string is too long");
2493 return NULL;
2494 }
2495
2496 if (! (result = (PyStringObject *)
2497 PyString_FromStringAndSize(NULL, result_len)) )
2498 return NULL;
2499
2500 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002501 result_s = PyString_AS_STRING(result);
2502
2503 /* TODO: special case single character, which doesn't need memcpy */
2504
2505 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002506 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002507 result_s += to_len;
2508 count -= 1;
2509
2510 for (i=0; i<count; i++) {
2511 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002512 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002513 result_s += to_len;
2514 }
2515
2516 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002517 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002518
2519 return result;
2520}
2521
2522/* Special case for deleting a single character */
2523/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2524Py_LOCAL(PyStringObject *)
2525replace_delete_single_character(PyStringObject *self,
2526 char from_c, Py_ssize_t maxcount)
2527{
2528 char *self_s, *result_s;
2529 char *start, *next, *end;
2530 Py_ssize_t self_len, result_len;
2531 Py_ssize_t count;
2532 PyStringObject *result;
2533
2534 self_len = PyString_GET_SIZE(self);
2535 self_s = PyString_AS_STRING(self);
2536
2537 count = countchar(self_s, self_len, from_c, maxcount);
2538 if (count == 0) {
2539 return return_self(self);
2540 }
2541
2542 result_len = self_len - count; /* from_len == 1 */
2543 assert(result_len>=0);
2544
2545 if ( (result = (PyStringObject *)
2546 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2547 return NULL;
2548 result_s = PyString_AS_STRING(result);
2549
2550 start = self_s;
2551 end = self_s + self_len;
2552 while (count-- > 0) {
2553 next = findchar(start, end-start, from_c);
2554 if (next == NULL)
2555 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002556 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002557 result_s += (next-start);
2558 start = next+1;
2559 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002560 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002561
Thomas Wouters477c8d52006-05-27 19:21:47 +00002562 return result;
2563}
2564
2565/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2566
2567Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002568replace_delete_substring(PyStringObject *self,
2569 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002570 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002571 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002572 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002573 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002574 Py_ssize_t count, offset;
2575 PyStringObject *result;
2576
2577 self_len = PyString_GET_SIZE(self);
2578 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002579
2580 count = countstring(self_s, self_len,
2581 from_s, from_len,
2582 0, self_len, 1,
2583 maxcount);
2584
2585 if (count == 0) {
2586 /* no matches */
2587 return return_self(self);
2588 }
2589
2590 result_len = self_len - (count * from_len);
2591 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002592
Thomas Wouters477c8d52006-05-27 19:21:47 +00002593 if ( (result = (PyStringObject *)
2594 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2595 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002596
Thomas Wouters477c8d52006-05-27 19:21:47 +00002597 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002598
Thomas Wouters477c8d52006-05-27 19:21:47 +00002599 start = self_s;
2600 end = self_s + self_len;
2601 while (count-- > 0) {
2602 offset = findstring(start, end-start,
2603 from_s, from_len,
2604 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605 if (offset == -1)
2606 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002608
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002609 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002610
Thomas Wouters477c8d52006-05-27 19:21:47 +00002611 result_s += (next-start);
2612 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002614 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002615 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002616}
2617
Thomas Wouters477c8d52006-05-27 19:21:47 +00002618/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2619Py_LOCAL(PyStringObject *)
2620replace_single_character_in_place(PyStringObject *self,
2621 char from_c, char to_c,
2622 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002624 char *self_s, *result_s, *start, *end, *next;
2625 Py_ssize_t self_len;
2626 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002627
Thomas Wouters477c8d52006-05-27 19:21:47 +00002628 /* The result string will be the same size */
2629 self_s = PyString_AS_STRING(self);
2630 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002631
Thomas Wouters477c8d52006-05-27 19:21:47 +00002632 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002633
Thomas Wouters477c8d52006-05-27 19:21:47 +00002634 if (next == NULL) {
2635 /* No matches; return the original string */
2636 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002637 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002638
Thomas Wouters477c8d52006-05-27 19:21:47 +00002639 /* Need to make a new string */
2640 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2641 if (result == NULL)
2642 return NULL;
2643 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002644 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002645
Thomas Wouters477c8d52006-05-27 19:21:47 +00002646 /* change everything in-place, starting with this one */
2647 start = result_s + (next-self_s);
2648 *start = to_c;
2649 start++;
2650 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002651
Thomas Wouters477c8d52006-05-27 19:21:47 +00002652 while (--maxcount > 0) {
2653 next = findchar(start, end-start, from_c);
2654 if (next == NULL)
2655 break;
2656 *next = to_c;
2657 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002658 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002659
Thomas Wouters477c8d52006-05-27 19:21:47 +00002660 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002661}
2662
Thomas Wouters477c8d52006-05-27 19:21:47 +00002663/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2664Py_LOCAL(PyStringObject *)
2665replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002666 const char *from_s, Py_ssize_t from_len,
2667 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002668 Py_ssize_t maxcount)
2669{
2670 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002671 char *self_s;
2672 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002673 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002674
Thomas Wouters477c8d52006-05-27 19:21:47 +00002675 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002676
Thomas Wouters477c8d52006-05-27 19:21:47 +00002677 self_s = PyString_AS_STRING(self);
2678 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002679
Thomas Wouters477c8d52006-05-27 19:21:47 +00002680 offset = findstring(self_s, self_len,
2681 from_s, from_len,
2682 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002683 if (offset == -1) {
2684 /* No matches; return the original string */
2685 return return_self(self);
2686 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002687
Thomas Wouters477c8d52006-05-27 19:21:47 +00002688 /* Need to make a new string */
2689 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2690 if (result == NULL)
2691 return NULL;
2692 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002693 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002694
Thomas Wouters477c8d52006-05-27 19:21:47 +00002695 /* change everything in-place, starting with this one */
2696 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002697 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002698 start += from_len;
2699 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 while ( --maxcount > 0) {
2702 offset = findstring(start, end-start,
2703 from_s, from_len,
2704 0, end-start, FORWARD);
2705 if (offset==-1)
2706 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002707 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708 start += offset+from_len;
2709 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002710
Thomas Wouters477c8d52006-05-27 19:21:47 +00002711 return result;
2712}
2713
2714/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2715Py_LOCAL(PyStringObject *)
2716replace_single_character(PyStringObject *self,
2717 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002718 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002719 Py_ssize_t maxcount)
2720{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002721 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002722 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002723 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002724 Py_ssize_t count, product;
2725 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002726
Thomas Wouters477c8d52006-05-27 19:21:47 +00002727 self_s = PyString_AS_STRING(self);
2728 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002729
Thomas Wouters477c8d52006-05-27 19:21:47 +00002730 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002731 if (count == 0) {
2732 /* no matches, return unchanged */
2733 return return_self(self);
2734 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002735
Thomas Wouters477c8d52006-05-27 19:21:47 +00002736 /* use the difference between current and new, hence the "-1" */
2737 /* result_len = self_len + count * (to_len-1) */
2738 product = count * (to_len-1);
2739 if (product / (to_len-1) != count) {
2740 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2741 return NULL;
2742 }
2743 result_len = self_len + product;
2744 if (result_len < 0) {
2745 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2746 return NULL;
2747 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002748
Thomas Wouters477c8d52006-05-27 19:21:47 +00002749 if ( (result = (PyStringObject *)
2750 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2751 return NULL;
2752 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002753
Thomas Wouters477c8d52006-05-27 19:21:47 +00002754 start = self_s;
2755 end = self_s + self_len;
2756 while (count-- > 0) {
2757 next = findchar(start, end-start, from_c);
2758 if (next == NULL)
2759 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002760
Thomas Wouters477c8d52006-05-27 19:21:47 +00002761 if (next == start) {
2762 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002763 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002764 result_s += to_len;
2765 start += 1;
2766 } else {
2767 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002768 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002769 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002770 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002771 result_s += to_len;
2772 start = next+1;
2773 }
2774 }
2775 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002776 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002777
Thomas Wouters477c8d52006-05-27 19:21:47 +00002778 return result;
2779}
2780
2781/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2782Py_LOCAL(PyStringObject *)
2783replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002784 const char *from_s, Py_ssize_t from_len,
2785 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002786 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002787 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002789 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002790 Py_ssize_t count, offset, product;
2791 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002792
Thomas Wouters477c8d52006-05-27 19:21:47 +00002793 self_s = PyString_AS_STRING(self);
2794 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002795
Thomas Wouters477c8d52006-05-27 19:21:47 +00002796 count = countstring(self_s, self_len,
2797 from_s, from_len,
2798 0, self_len, FORWARD, maxcount);
2799 if (count == 0) {
2800 /* no matches, return unchanged */
2801 return return_self(self);
2802 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002803
Thomas Wouters477c8d52006-05-27 19:21:47 +00002804 /* Check for overflow */
2805 /* result_len = self_len + count * (to_len-from_len) */
2806 product = count * (to_len-from_len);
2807 if (product / (to_len-from_len) != count) {
2808 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2809 return NULL;
2810 }
2811 result_len = self_len + product;
2812 if (result_len < 0) {
2813 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2814 return NULL;
2815 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002816
Thomas Wouters477c8d52006-05-27 19:21:47 +00002817 if ( (result = (PyStringObject *)
2818 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2819 return NULL;
2820 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002821
Thomas Wouters477c8d52006-05-27 19:21:47 +00002822 start = self_s;
2823 end = self_s + self_len;
2824 while (count-- > 0) {
2825 offset = findstring(start, end-start,
2826 from_s, from_len,
2827 0, end-start, FORWARD);
2828 if (offset == -1)
2829 break;
2830 next = start+offset;
2831 if (next == start) {
2832 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002833 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002834 result_s += to_len;
2835 start += from_len;
2836 } else {
2837 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002838 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002839 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002840 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002841 result_s += to_len;
2842 start = next+from_len;
2843 }
2844 }
2845 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002846 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002847
Thomas Wouters477c8d52006-05-27 19:21:47 +00002848 return result;
2849}
2850
2851
2852Py_LOCAL(PyStringObject *)
2853replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002854 const char *from_s, Py_ssize_t from_len,
2855 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002856 Py_ssize_t maxcount)
2857{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002858 if (maxcount < 0) {
2859 maxcount = PY_SSIZE_T_MAX;
2860 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2861 /* nothing to do; return the original string */
2862 return return_self(self);
2863 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002864
Thomas Wouters477c8d52006-05-27 19:21:47 +00002865 if (maxcount == 0 ||
2866 (from_len == 0 && to_len == 0)) {
2867 /* nothing to do; return the original string */
2868 return return_self(self);
2869 }
2870
2871 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002872
Thomas Wouters477c8d52006-05-27 19:21:47 +00002873 if (from_len == 0) {
2874 /* insert the 'to' string everywhere. */
2875 /* >>> "Python".replace("", ".") */
2876 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002877 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002878 }
2879
2880 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2881 /* point for an empty self string to generate a non-empty string */
2882 /* Special case so the remaining code always gets a non-empty string */
2883 if (PyString_GET_SIZE(self) == 0) {
2884 return return_self(self);
2885 }
2886
2887 if (to_len == 0) {
2888 /* delete all occurances of 'from' string */
2889 if (from_len == 1) {
2890 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002891 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002892 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002893 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002894 }
2895 }
2896
2897 /* Handle special case where both strings have the same length */
2898
2899 if (from_len == to_len) {
2900 if (from_len == 1) {
2901 return replace_single_character_in_place(
2902 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002903 from_s[0],
2904 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002905 maxcount);
2906 } else {
2907 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002908 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002909 }
2910 }
2911
2912 /* Otherwise use the more generic algorithms */
2913 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002914 return replace_single_character(self, from_s[0],
2915 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002916 } else {
2917 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002918 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002919 }
2920}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002921
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002922PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002923"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002924\n\
2925Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002926old replaced by new. If the optional argument count is\n\
2927given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002928
2929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002930string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002931{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002932 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002933 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002934 const char *from_s, *to_s;
2935 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002936
Thomas Wouters477c8d52006-05-27 19:21:47 +00002937 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002938 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002939
Thomas Wouters477c8d52006-05-27 19:21:47 +00002940 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002941 from_s = PyString_AS_STRING(from);
2942 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002943 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002944 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002945 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002946 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002947 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948 return NULL;
2949
Thomas Wouters477c8d52006-05-27 19:21:47 +00002950 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002951 to_s = PyString_AS_STRING(to);
2952 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002953 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002954 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002955 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002956 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002957 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002958 return NULL;
2959
Thomas Wouters477c8d52006-05-27 19:21:47 +00002960 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002961 from_s, from_len,
2962 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002963}
2964
Thomas Wouters477c8d52006-05-27 19:21:47 +00002965/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002966
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002967/* Matches the end (direction >= 0) or start (direction < 0) of self
2968 * against substr, using the start and end arguments. Returns
2969 * -1 on error, 0 if not found and 1 if found.
2970 */
2971Py_LOCAL(int)
2972_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2973 Py_ssize_t end, int direction)
2974{
2975 Py_ssize_t len = PyString_GET_SIZE(self);
2976 Py_ssize_t slen;
2977 const char* sub;
2978 const char* str;
2979
2980 if (PyString_Check(substr)) {
2981 sub = PyString_AS_STRING(substr);
2982 slen = PyString_GET_SIZE(substr);
2983 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002984 else if (PyUnicode_Check(substr))
2985 return PyUnicode_Tailmatch((PyObject *)self,
2986 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002987 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2988 return -1;
2989 str = PyString_AS_STRING(self);
2990
2991 string_adjust_indices(&start, &end, len);
2992
2993 if (direction < 0) {
2994 /* startswith */
2995 if (start+slen > len)
2996 return 0;
2997 } else {
2998 /* endswith */
2999 if (end-start < slen || start > len)
3000 return 0;
3001
3002 if (end-slen > start)
3003 start = end - slen;
3004 }
3005 if (end-start >= slen)
3006 return ! memcmp(str+start, sub, slen);
3007 return 0;
3008}
3009
3010
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003011PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003012"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003013\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003014Return True if S starts with the specified prefix, False otherwise.\n\
3015With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003016With optional end, stop comparing S at that position.\n\
3017prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003018
3019static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003020string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003021{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003022 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003023 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003024 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003025 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003026
Guido van Rossumc6821402000-05-08 14:08:05 +00003027 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3028 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003030 if (PyTuple_Check(subobj)) {
3031 Py_ssize_t i;
3032 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3033 result = _string_tailmatch(self,
3034 PyTuple_GET_ITEM(subobj, i),
3035 start, end, -1);
3036 if (result == -1)
3037 return NULL;
3038 else if (result) {
3039 Py_RETURN_TRUE;
3040 }
3041 }
3042 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003044 result = _string_tailmatch(self, subobj, start, end, -1);
3045 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003047 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003048 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049}
3050
3051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003052PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003053"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003055Return True if S ends with the specified suffix, False otherwise.\n\
3056With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003057With optional end, stop comparing S at that position.\n\
3058suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003059
3060static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003061string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003062{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003063 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003064 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003066 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003067
Guido van Rossumc6821402000-05-08 14:08:05 +00003068 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3069 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003071 if (PyTuple_Check(subobj)) {
3072 Py_ssize_t i;
3073 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3074 result = _string_tailmatch(self,
3075 PyTuple_GET_ITEM(subobj, i),
3076 start, end, +1);
3077 if (result == -1)
3078 return NULL;
3079 else if (result) {
3080 Py_RETURN_TRUE;
3081 }
3082 }
3083 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003085 result = _string_tailmatch(self, subobj, start, end, +1);
3086 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003088 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003089 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090}
3091
3092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003093PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003094"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003095\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003096Encodes S using the codec registered for encoding. encoding defaults\n\
3097to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003098handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003099a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3100'xmlcharrefreplace' as well as any other name registered with\n\
3101codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003102
3103static PyObject *
3104string_encode(PyStringObject *self, PyObject *args)
3105{
3106 char *encoding = NULL;
3107 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003108 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003109
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003110 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3111 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003112 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003113 if (v == NULL)
3114 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003115 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003116 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003117 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003118 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003119 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003120 Py_DECREF(v);
3121 return NULL;
3122 }
3123 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003124
3125 onError:
3126 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003127}
3128
3129
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003130PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003131"S.decode([encoding[,errors]]) -> object\n\
3132\n\
3133Decodes S using the codec registered for encoding. encoding defaults\n\
3134to the default encoding. errors may be given to set a different error\n\
3135handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003136a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3137as well as any other name registerd with codecs.register_error that is\n\
3138able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003139
3140static PyObject *
3141string_decode(PyStringObject *self, PyObject *args)
3142{
3143 char *encoding = NULL;
3144 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003145 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003146
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003147 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3148 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003149 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003150 if (v == NULL)
3151 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003152 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3153 PyErr_Format(PyExc_TypeError,
3154 "decoder did not return a string/unicode object "
3155 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003156 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003157 Py_DECREF(v);
3158 return NULL;
3159 }
3160 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003161
3162 onError:
3163 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003164}
3165
3166
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003167PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168"S.expandtabs([tabsize]) -> string\n\
3169\n\
3170Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003171If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003172
3173static PyObject*
3174string_expandtabs(PyStringObject *self, PyObject *args)
3175{
3176 const char *e, *p;
3177 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003178 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179 PyObject *u;
3180 int tabsize = 8;
3181
3182 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3183 return NULL;
3184
Thomas Wouters7e474022000-07-16 12:04:32 +00003185 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003186 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3188 for (p = PyString_AS_STRING(self); p < e; p++)
3189 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003190 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003192 if (old_j > j) {
3193 PyErr_SetString(PyExc_OverflowError,
3194 "new string is too long");
3195 return NULL;
3196 }
3197 old_j = j;
3198 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 }
3200 else {
3201 j++;
3202 if (*p == '\n' || *p == '\r') {
3203 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003204 old_j = j = 0;
3205 if (i < 0) {
3206 PyErr_SetString(PyExc_OverflowError,
3207 "new string is too long");
3208 return NULL;
3209 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 }
3211 }
3212
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003213 if ((i + j) < 0) {
3214 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3215 return NULL;
3216 }
3217
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218 /* Second pass: create output string and fill it */
3219 u = PyString_FromStringAndSize(NULL, i + j);
3220 if (!u)
3221 return NULL;
3222
3223 j = 0;
3224 q = PyString_AS_STRING(u);
3225
3226 for (p = PyString_AS_STRING(self); p < e; p++)
3227 if (*p == '\t') {
3228 if (tabsize > 0) {
3229 i = tabsize - (j % tabsize);
3230 j += i;
3231 while (i--)
3232 *q++ = ' ';
3233 }
3234 }
3235 else {
3236 j++;
3237 *q++ = *p;
3238 if (*p == '\n' || *p == '\r')
3239 j = 0;
3240 }
3241
3242 return u;
3243}
3244
Thomas Wouters477c8d52006-05-27 19:21:47 +00003245Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003246pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003247{
3248 PyObject *u;
3249
3250 if (left < 0)
3251 left = 0;
3252 if (right < 0)
3253 right = 0;
3254
Tim Peters8fa5dd02001-09-12 02:18:30 +00003255 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003256 Py_INCREF(self);
3257 return (PyObject *)self;
3258 }
3259
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003260 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003261 left + PyString_GET_SIZE(self) + right);
3262 if (u) {
3263 if (left)
3264 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003265 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003266 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003267 PyString_GET_SIZE(self));
3268 if (right)
3269 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3270 fill, right);
3271 }
3272
3273 return u;
3274}
3275
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003276PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003277"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003278"\n"
3279"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003280"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003281
3282static PyObject *
3283string_ljust(PyStringObject *self, PyObject *args)
3284{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003285 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003286 char fillchar = ' ';
3287
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003288 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003289 return NULL;
3290
Tim Peters8fa5dd02001-09-12 02:18:30 +00003291 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003292 Py_INCREF(self);
3293 return (PyObject*) self;
3294 }
3295
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003296 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003297}
3298
3299
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003300PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003301"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003302"\n"
3303"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003304"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305
3306static PyObject *
3307string_rjust(PyStringObject *self, PyObject *args)
3308{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003309 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003310 char fillchar = ' ';
3311
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003312 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313 return NULL;
3314
Tim Peters8fa5dd02001-09-12 02:18:30 +00003315 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003316 Py_INCREF(self);
3317 return (PyObject*) self;
3318 }
3319
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003320 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321}
3322
3323
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003324PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003325"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003326"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003327"Return S centered in a string of length width. Padding is\n"
3328"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003329
3330static PyObject *
3331string_center(PyStringObject *self, PyObject *args)
3332{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003333 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003334 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003335 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003336
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003337 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338 return NULL;
3339
Tim Peters8fa5dd02001-09-12 02:18:30 +00003340 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341 Py_INCREF(self);
3342 return (PyObject*) self;
3343 }
3344
3345 marg = width - PyString_GET_SIZE(self);
3346 left = marg / 2 + (marg & width & 1);
3347
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003348 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003349}
3350
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003351PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003352"S.zfill(width) -> string\n"
3353"\n"
3354"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003355"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003356
3357static PyObject *
3358string_zfill(PyStringObject *self, PyObject *args)
3359{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003360 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003361 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003362 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003363 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003364
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003365 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003366 return NULL;
3367
3368 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003369 if (PyString_CheckExact(self)) {
3370 Py_INCREF(self);
3371 return (PyObject*) self;
3372 }
3373 else
3374 return PyString_FromStringAndSize(
3375 PyString_AS_STRING(self),
3376 PyString_GET_SIZE(self)
3377 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003378 }
3379
3380 fill = width - PyString_GET_SIZE(self);
3381
3382 s = pad(self, fill, 0, '0');
3383
3384 if (s == NULL)
3385 return NULL;
3386
3387 p = PyString_AS_STRING(s);
3388 if (p[fill] == '+' || p[fill] == '-') {
3389 /* move sign to beginning of string */
3390 p[0] = p[fill];
3391 p[fill] = '0';
3392 }
3393
3394 return (PyObject*) s;
3395}
3396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003397PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003398"S.isspace() -> bool\n\
3399\n\
3400Return True if all characters in S are whitespace\n\
3401and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402
3403static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003404string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003405{
Fred Drakeba096332000-07-09 07:04:36 +00003406 register const unsigned char *p
3407 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003408 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 /* Shortcut for single character strings */
3411 if (PyString_GET_SIZE(self) == 1 &&
3412 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003413 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003415 /* Special case for empty strings */
3416 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003417 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003418
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419 e = p + PyString_GET_SIZE(self);
3420 for (; p < e; p++) {
3421 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003422 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003424 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425}
3426
3427
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003428PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003429"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003430\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003431Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003432and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003433
3434static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003435string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003436{
Fred Drakeba096332000-07-09 07:04:36 +00003437 register const unsigned char *p
3438 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003439 register const unsigned char *e;
3440
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003441 /* Shortcut for single character strings */
3442 if (PyString_GET_SIZE(self) == 1 &&
3443 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003444 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003445
3446 /* Special case for empty strings */
3447 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003448 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003449
3450 e = p + PyString_GET_SIZE(self);
3451 for (; p < e; p++) {
3452 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003453 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003454 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003455 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003456}
3457
3458
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003459PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003460"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003461\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003462Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003463and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003464
3465static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003466string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003467{
Fred Drakeba096332000-07-09 07:04:36 +00003468 register const unsigned char *p
3469 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003470 register const unsigned char *e;
3471
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003472 /* Shortcut for single character strings */
3473 if (PyString_GET_SIZE(self) == 1 &&
3474 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003475 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003476
3477 /* Special case for empty strings */
3478 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003479 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003480
3481 e = p + PyString_GET_SIZE(self);
3482 for (; p < e; p++) {
3483 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003484 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003485 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003486 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003487}
3488
3489
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003490PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003491"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003493Return True if all characters in S are digits\n\
3494and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495
3496static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003497string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498{
Fred Drakeba096332000-07-09 07:04:36 +00003499 register const unsigned char *p
3500 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003501 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503 /* Shortcut for single character strings */
3504 if (PyString_GET_SIZE(self) == 1 &&
3505 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003506 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003507
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003508 /* Special case for empty strings */
3509 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003510 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003511
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512 e = p + PyString_GET_SIZE(self);
3513 for (; p < e; p++) {
3514 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003515 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003517 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518}
3519
3520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003521PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003523\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003524Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003525at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526
3527static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003528string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529{
Fred Drakeba096332000-07-09 07:04:36 +00003530 register const unsigned char *p
3531 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003532 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533 int cased;
3534
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535 /* Shortcut for single character strings */
3536 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003537 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003539 /* Special case for empty strings */
3540 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003541 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003542
Guido van Rossum4c08d552000-03-10 22:55:18 +00003543 e = p + PyString_GET_SIZE(self);
3544 cased = 0;
3545 for (; p < e; p++) {
3546 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003547 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548 else if (!cased && islower(*p))
3549 cased = 1;
3550 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003551 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552}
3553
3554
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003555PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003556"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003558Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003559at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003560
3561static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003562string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003563{
Fred Drakeba096332000-07-09 07:04:36 +00003564 register const unsigned char *p
3565 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003566 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003567 int cased;
3568
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569 /* Shortcut for single character strings */
3570 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003571 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003572
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003573 /* Special case for empty strings */
3574 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003576
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577 e = p + PyString_GET_SIZE(self);
3578 cased = 0;
3579 for (; p < e; p++) {
3580 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003582 else if (!cased && isupper(*p))
3583 cased = 1;
3584 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586}
3587
3588
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003589PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003590"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003591\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003592Return True if S is a titlecased string and there is at least one\n\
3593character in S, i.e. uppercase characters may only follow uncased\n\
3594characters and lowercase characters only cased ones. Return False\n\
3595otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003596
3597static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003598string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599{
Fred Drakeba096332000-07-09 07:04:36 +00003600 register const unsigned char *p
3601 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003602 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603 int cased, previous_is_cased;
3604
Guido van Rossum4c08d552000-03-10 22:55:18 +00003605 /* Shortcut for single character strings */
3606 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003608
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003609 /* Special case for empty strings */
3610 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003612
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613 e = p + PyString_GET_SIZE(self);
3614 cased = 0;
3615 previous_is_cased = 0;
3616 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003617 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618
3619 if (isupper(ch)) {
3620 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622 previous_is_cased = 1;
3623 cased = 1;
3624 }
3625 else if (islower(ch)) {
3626 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628 previous_is_cased = 1;
3629 cased = 1;
3630 }
3631 else
3632 previous_is_cased = 0;
3633 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635}
3636
3637
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003638PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003639"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640\n\
3641Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003642Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003643is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645static PyObject*
3646string_splitlines(PyStringObject *self, PyObject *args)
3647{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003648 register Py_ssize_t i;
3649 register Py_ssize_t j;
3650 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003651 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652 PyObject *list;
3653 PyObject *str;
3654 char *data;
3655
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003656 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 return NULL;
3658
3659 data = PyString_AS_STRING(self);
3660 len = PyString_GET_SIZE(self);
3661
Thomas Wouters477c8d52006-05-27 19:21:47 +00003662 /* This does not use the preallocated list because splitlines is
3663 usually run with hundreds of newlines. The overhead of
3664 switching between PyList_SET_ITEM and append causes about a
3665 2-3% slowdown for that common case. A smarter implementation
3666 could move the if check out, so the SET_ITEMs are done first
3667 and the appends only done when the prealloc buffer is full.
3668 That's too much work for little gain.*/
3669
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 list = PyList_New(0);
3671 if (!list)
3672 goto onError;
3673
3674 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003675 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003676
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677 /* Find a line and append it */
3678 while (i < len && data[i] != '\n' && data[i] != '\r')
3679 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680
3681 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003682 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683 if (i < len) {
3684 if (data[i] == '\r' && i + 1 < len &&
3685 data[i+1] == '\n')
3686 i += 2;
3687 else
3688 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003689 if (keepends)
3690 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003692 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693 j = i;
3694 }
3695 if (j < len) {
3696 SPLIT_APPEND(data, j, len);
3697 }
3698
3699 return list;
3700
3701 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003702 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703 return NULL;
3704}
3705
3706#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003707#undef SPLIT_ADD
3708#undef MAX_PREALLOC
3709#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003711static PyObject *
3712string_getnewargs(PyStringObject *v)
3713{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003714 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003715}
3716
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003717
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003718static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003719string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003720 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3721 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003722 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003723 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3724 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003725 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3726 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3727 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3728 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3729 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3730 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3731 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003732 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3733 capitalize__doc__},
3734 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3735 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3736 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003737 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003738 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3739 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3740 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3741 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3742 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3743 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3744 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003745 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3746 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003747 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3748 startswith__doc__},
3749 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3750 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3751 swapcase__doc__},
3752 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3753 translate__doc__},
3754 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3755 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3756 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3757 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3758 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3759 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3760 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3761 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3762 expandtabs__doc__},
3763 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3764 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003765 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003766 {NULL, NULL} /* sentinel */
3767};
3768
Jeremy Hylton938ace62002-07-17 16:30:39 +00003769static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003770str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3771
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003772static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003773string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003774{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003775 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003776 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003777
Guido van Rossumae960af2001-08-30 03:11:59 +00003778 if (type != &PyString_Type)
3779 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003780 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003781 return NULL;
3782 if (x == NULL)
3783 return PyString_FromString("");
3784 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003785}
3786
Guido van Rossumae960af2001-08-30 03:11:59 +00003787static PyObject *
3788str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3789{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003790 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003791 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003792
3793 assert(PyType_IsSubtype(type, &PyString_Type));
3794 tmp = string_new(&PyString_Type, args, kwds);
3795 if (tmp == NULL)
3796 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003797 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003798 n = PyString_GET_SIZE(tmp);
3799 pnew = type->tp_alloc(type, n);
3800 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003801 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003802 ((PyStringObject *)pnew)->ob_shash =
3803 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003804 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003805 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003806 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003807 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003808}
3809
Guido van Rossumcacfc072002-05-24 19:01:59 +00003810static PyObject *
3811basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3812{
3813 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003814 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003815 return NULL;
3816}
3817
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003818static PyObject *
3819string_mod(PyObject *v, PyObject *w)
3820{
3821 if (!PyString_Check(v)) {
3822 Py_INCREF(Py_NotImplemented);
3823 return Py_NotImplemented;
3824 }
3825 return PyString_Format(v, w);
3826}
3827
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003828PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003829"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003830
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003831static PyNumberMethods string_as_number = {
3832 0, /*nb_add*/
3833 0, /*nb_subtract*/
3834 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003835 string_mod, /*nb_remainder*/
3836};
3837
3838
Guido van Rossumcacfc072002-05-24 19:01:59 +00003839PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003840 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003841 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003842 0,
3843 0,
3844 0, /* tp_dealloc */
3845 0, /* tp_print */
3846 0, /* tp_getattr */
3847 0, /* tp_setattr */
3848 0, /* tp_compare */
3849 0, /* tp_repr */
3850 0, /* tp_as_number */
3851 0, /* tp_as_sequence */
3852 0, /* tp_as_mapping */
3853 0, /* tp_hash */
3854 0, /* tp_call */
3855 0, /* tp_str */
3856 0, /* tp_getattro */
3857 0, /* tp_setattro */
3858 0, /* tp_as_buffer */
3859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3860 basestring_doc, /* tp_doc */
3861 0, /* tp_traverse */
3862 0, /* tp_clear */
3863 0, /* tp_richcompare */
3864 0, /* tp_weaklistoffset */
3865 0, /* tp_iter */
3866 0, /* tp_iternext */
3867 0, /* tp_methods */
3868 0, /* tp_members */
3869 0, /* tp_getset */
3870 &PyBaseObject_Type, /* tp_base */
3871 0, /* tp_dict */
3872 0, /* tp_descr_get */
3873 0, /* tp_descr_set */
3874 0, /* tp_dictoffset */
3875 0, /* tp_init */
3876 0, /* tp_alloc */
3877 basestring_new, /* tp_new */
3878 0, /* tp_free */
3879};
3880
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003881PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003882"str(object) -> string\n\
3883\n\
3884Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003885If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003886
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003887static PyObject *str_iter(PyObject *seq);
3888
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003889PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003890 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003891 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003892 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003893 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003894 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003895 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003896 0, /* tp_getattr */
3897 0, /* tp_setattr */
3898 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003899 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003900 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003901 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003902 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003903 (hashfunc)string_hash, /* tp_hash */
3904 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003905 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003906 PyObject_GenericGetAttr, /* tp_getattro */
3907 0, /* tp_setattro */
3908 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003909 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3910 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003911 string_doc, /* tp_doc */
3912 0, /* tp_traverse */
3913 0, /* tp_clear */
3914 (richcmpfunc)string_richcompare, /* tp_richcompare */
3915 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003916 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003917 0, /* tp_iternext */
3918 string_methods, /* tp_methods */
3919 0, /* tp_members */
3920 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003921 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003922 0, /* tp_dict */
3923 0, /* tp_descr_get */
3924 0, /* tp_descr_set */
3925 0, /* tp_dictoffset */
3926 0, /* tp_init */
3927 0, /* tp_alloc */
3928 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003929 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003930};
3931
3932void
Fred Drakeba096332000-07-09 07:04:36 +00003933PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003934{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003935 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003936 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003937 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 if (w == NULL || !PyString_Check(*pv)) {
3939 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003940 *pv = NULL;
3941 return;
3942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003943 v = string_concat((PyStringObject *) *pv, w);
3944 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003945 *pv = v;
3946}
3947
Guido van Rossum013142a1994-08-30 08:19:36 +00003948void
Fred Drakeba096332000-07-09 07:04:36 +00003949PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003950{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003951 PyString_Concat(pv, w);
3952 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003953}
3954
3955
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003956/* The following function breaks the notion that strings are immutable:
3957 it changes the size of a string. We get away with this only if there
3958 is only one module referencing the object. You can also think of it
3959 as creating a new string object and destroying the old one, only
3960 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003961 already be known to some other part of the code...
3962 Note that if there's not enough memory to resize the string, the original
3963 string object at *pv is deallocated, *pv is set to NULL, an "out of
3964 memory" exception is set, and -1 is returned. Else (on success) 0 is
3965 returned, and the value in *pv may or may not be the same as on input.
3966 As always, an extra byte is allocated for a trailing \0 byte (newsize
3967 does *not* include that), and a trailing \0 byte is stored.
3968*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003969
3970int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003971_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003972{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003973 register PyObject *v;
3974 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003975 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003976 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00003977 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003978 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 Py_DECREF(v);
3980 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003981 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003982 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003983 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003984 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003985 _Py_ForgetReference(v);
3986 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003987 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003988 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003989 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003990 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003991 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003992 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003993 _Py_NewReference(*pv);
3994 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003995 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003996 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003997 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003998 return 0;
3999}
Guido van Rossume5372401993-03-16 12:15:04 +00004000
4001/* Helpers for formatstring */
4002
Thomas Wouters477c8d52006-05-27 19:21:47 +00004003Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004004getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004005{
Thomas Wouters977485d2006-02-16 15:59:12 +00004006 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004007 if (argidx < arglen) {
4008 (*p_argidx)++;
4009 if (arglen < 0)
4010 return args;
4011 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004012 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004013 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004014 PyErr_SetString(PyExc_TypeError,
4015 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004016 return NULL;
4017}
4018
Tim Peters38fd5b62000-09-21 05:43:11 +00004019/* Format codes
4020 * F_LJUST '-'
4021 * F_SIGN '+'
4022 * F_BLANK ' '
4023 * F_ALT '#'
4024 * F_ZERO '0'
4025 */
Guido van Rossume5372401993-03-16 12:15:04 +00004026#define F_LJUST (1<<0)
4027#define F_SIGN (1<<1)
4028#define F_BLANK (1<<2)
4029#define F_ALT (1<<3)
4030#define F_ZERO (1<<4)
4031
Thomas Wouters477c8d52006-05-27 19:21:47 +00004032Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004033formatfloat(char *buf, size_t buflen, int flags,
4034 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004035{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004036 /* fmt = '%#.' + `prec` + `type`
4037 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004038 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004039 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004040 x = PyFloat_AsDouble(v);
4041 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004042 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004043 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004044 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004045 }
Guido van Rossume5372401993-03-16 12:15:04 +00004046 if (prec < 0)
4047 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004048 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4049 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004050 /* Worst case length calc to ensure no buffer overrun:
4051
4052 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004053 fmt = %#.<prec>g
4054 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004055 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004056 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004057
4058 'f' formats:
4059 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4060 len = 1 + 50 + 1 + prec = 52 + prec
4061
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004062 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004063 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004064
4065 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004066 if (((type == 'g' || type == 'G') &&
4067 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004068 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004069 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004070 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004071 return -1;
4072 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004073 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4074 (flags&F_ALT) ? "#" : "",
4075 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004076 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004077 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004078}
4079
Tim Peters38fd5b62000-09-21 05:43:11 +00004080/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4081 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4082 * Python's regular ints.
4083 * Return value: a new PyString*, or NULL if error.
4084 * . *pbuf is set to point into it,
4085 * *plen set to the # of chars following that.
4086 * Caller must decref it when done using pbuf.
4087 * The string starting at *pbuf is of the form
4088 * "-"? ("0x" | "0X")? digit+
4089 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004090 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004091 * There will be at least prec digits, zero-filled on the left if
4092 * necessary to get that many.
4093 * val object to be converted
4094 * flags bitmask of format flags; only F_ALT is looked at
4095 * prec minimum number of digits; 0-fill on left if needed
4096 * type a character in [duoxX]; u acts the same as d
4097 *
4098 * CAUTION: o, x and X conversions on regular ints can never
4099 * produce a '-' sign, but can for Python's unbounded ints.
4100 */
4101PyObject*
4102_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4103 char **pbuf, int *plen)
4104{
4105 PyObject *result = NULL;
4106 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004107 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004108 int sign; /* 1 if '-', else 0 */
4109 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004110 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004111 int numdigits; /* len == numnondigits + numdigits */
4112 int numnondigits = 0;
4113
Guido van Rossumddefaf32007-01-14 03:31:43 +00004114 /* Avoid exceeding SSIZE_T_MAX */
4115 if (prec > PY_SSIZE_T_MAX-3) {
4116 PyErr_SetString(PyExc_OverflowError,
4117 "precision too large");
4118 return NULL;
4119 }
4120
Tim Peters38fd5b62000-09-21 05:43:11 +00004121 switch (type) {
4122 case 'd':
4123 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00004124 /* Special-case boolean: we want 0/1 */
4125 if (PyBool_Check(val))
4126 result = PyNumber_ToBase(val, 10);
4127 else
4128 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004129 break;
4130 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004131 numnondigits = 2;
4132 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004133 break;
4134 case 'x':
4135 case 'X':
4136 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004137 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004138 break;
4139 default:
4140 assert(!"'type' not in [duoxX]");
4141 }
4142 if (!result)
4143 return NULL;
4144
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004145 buf = PyString_AsString(result);
4146 if (!buf) {
4147 Py_DECREF(result);
4148 return NULL;
4149 }
4150
Tim Peters38fd5b62000-09-21 05:43:11 +00004151 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004152 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004153 PyErr_BadInternalCall();
4154 return NULL;
4155 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004156 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004157 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004158 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4159 return NULL;
4160 }
4161 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004162 if (buf[len-1] == 'L') {
4163 --len;
4164 buf[len] = '\0';
4165 }
4166 sign = buf[0] == '-';
4167 numnondigits += sign;
4168 numdigits = len - numnondigits;
4169 assert(numdigits > 0);
4170
Tim Petersfff53252001-04-12 18:38:48 +00004171 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004172 if (((flags & F_ALT) == 0 &&
4173 (type == 'o' || type == 'x' || type == 'X'))) {
4174 assert(buf[sign] == '0');
4175 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4176 buf[sign+1] == 'o');
4177 numnondigits -= 2;
4178 buf += 2;
4179 len -= 2;
4180 if (sign)
4181 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004182 assert(len == numnondigits + numdigits);
4183 assert(numdigits > 0);
4184 }
4185
4186 /* Fill with leading zeroes to meet minimum width. */
4187 if (prec > numdigits) {
4188 PyObject *r1 = PyString_FromStringAndSize(NULL,
4189 numnondigits + prec);
4190 char *b1;
4191 if (!r1) {
4192 Py_DECREF(result);
4193 return NULL;
4194 }
4195 b1 = PyString_AS_STRING(r1);
4196 for (i = 0; i < numnondigits; ++i)
4197 *b1++ = *buf++;
4198 for (i = 0; i < prec - numdigits; i++)
4199 *b1++ = '0';
4200 for (i = 0; i < numdigits; i++)
4201 *b1++ = *buf++;
4202 *b1 = '\0';
4203 Py_DECREF(result);
4204 result = r1;
4205 buf = PyString_AS_STRING(result);
4206 len = numnondigits + prec;
4207 }
4208
4209 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004210 if (type == 'X') {
4211 /* Need to convert all lower case letters to upper case.
4212 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004213 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004214 if (buf[i] >= 'a' && buf[i] <= 'x')
4215 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004216 }
4217 *pbuf = buf;
4218 *plen = len;
4219 return result;
4220}
4221
Thomas Wouters477c8d52006-05-27 19:21:47 +00004222Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004223formatint(char *buf, size_t buflen, int flags,
4224 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004225{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004226 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004227 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4228 + 1 + 1 = 24 */
4229 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004230 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004231 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004232
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004233 x = PyInt_AsLong(v);
4234 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004235 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004236 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004237 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004238 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004239 if (x < 0 && type == 'u') {
4240 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004241 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004242 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4243 sign = "-";
4244 else
4245 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004246 if (prec < 0)
4247 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004248
4249 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004250 (type == 'x' || type == 'X' || type == 'o')) {
4251 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004252 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004253 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004254 * - when 0 is being converted, the C standard leaves off
4255 * the '0x' or '0X', which is inconsistent with other
4256 * %#x/%#X conversions and inconsistent with Python's
4257 * hex() function
4258 * - there are platforms that violate the standard and
4259 * convert 0 with the '0x' or '0X'
4260 * (Metrowerks, Compaq Tru64)
4261 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004262 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004263 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004264 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004265 * We can achieve the desired consistency by inserting our
4266 * own '0x' or '0X' prefix, and substituting %x/%X in place
4267 * of %#x/%#X.
4268 *
4269 * Note that this is the same approach as used in
4270 * formatint() in unicodeobject.c
4271 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004272 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4273 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004274 }
4275 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004276 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4277 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004278 prec, type);
4279 }
4280
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004281 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004282 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004283 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004284 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004285 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004286 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004287 return -1;
4288 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004289 if (sign[0])
4290 PyOS_snprintf(buf, buflen, fmt, -x);
4291 else
4292 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004293 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004294}
4295
Thomas Wouters477c8d52006-05-27 19:21:47 +00004296Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004297formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004298{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004299 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004300 if (PyString_Check(v)) {
4301 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004302 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004303 }
4304 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004305 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004306 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004307 }
4308 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004309 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004310}
4311
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004312/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4313
4314 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4315 chars are formatted. XXX This is a magic number. Each formatting
4316 routine does bounds checking to ensure no overflow, but a better
4317 solution may be to malloc a buffer of appropriate size for each
4318 format. For now, the current solution is sufficient.
4319*/
4320#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004321
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004322PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004323PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004324{
4325 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004326 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004327 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004328 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004329 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004330 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004331 PyObject *dict = NULL;
4332 if (format == NULL || !PyString_Check(format) || args == NULL) {
4333 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004334 return NULL;
4335 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004336 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004337 fmt = PyString_AS_STRING(format);
4338 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004339 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004340 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004341 if (result == NULL)
4342 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004343 res = PyString_AsString(result);
4344 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004345 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004346 argidx = 0;
4347 }
4348 else {
4349 arglen = -1;
4350 argidx = -2;
4351 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004352 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004353 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004354 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004355 while (--fmtcnt >= 0) {
4356 if (*fmt != '%') {
4357 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004358 rescnt = fmtcnt + 100;
4359 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004360 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004361 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004362 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004363 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004364 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004365 }
4366 *res++ = *fmt++;
4367 }
4368 else {
4369 /* Got a format specifier */
4370 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004371 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004372 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004373 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004374 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004375 PyObject *v = NULL;
4376 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004377 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004378 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004379 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004380 char formatbuf[FORMATBUFLEN];
4381 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004382 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004383 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004384
Guido van Rossumda9c2711996-12-05 21:58:58 +00004385 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004386 if (*fmt == '(') {
4387 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004388 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004389 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004390 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004391
4392 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004393 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004394 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004395 goto error;
4396 }
4397 ++fmt;
4398 --fmtcnt;
4399 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004400 /* Skip over balanced parentheses */
4401 while (pcount > 0 && --fmtcnt >= 0) {
4402 if (*fmt == ')')
4403 --pcount;
4404 else if (*fmt == '(')
4405 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004406 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004407 }
4408 keylen = fmt - keystart - 1;
4409 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004410 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004411 "incomplete format key");
4412 goto error;
4413 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004414 key = PyString_FromStringAndSize(keystart,
4415 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004416 if (key == NULL)
4417 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004418 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004419 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004420 args_owned = 0;
4421 }
4422 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004423 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004424 if (args == NULL) {
4425 goto error;
4426 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004427 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004428 arglen = -1;
4429 argidx = -2;
4430 }
Guido van Rossume5372401993-03-16 12:15:04 +00004431 while (--fmtcnt >= 0) {
4432 switch (c = *fmt++) {
4433 case '-': flags |= F_LJUST; continue;
4434 case '+': flags |= F_SIGN; continue;
4435 case ' ': flags |= F_BLANK; continue;
4436 case '#': flags |= F_ALT; continue;
4437 case '0': flags |= F_ZERO; continue;
4438 }
4439 break;
4440 }
4441 if (c == '*') {
4442 v = getnextarg(args, arglen, &argidx);
4443 if (v == NULL)
4444 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004445 if (!PyInt_Check(v)) {
4446 PyErr_SetString(PyExc_TypeError,
4447 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004448 goto error;
4449 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004450 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004451 if (width == -1 && PyErr_Occurred())
4452 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004453 if (width < 0) {
4454 flags |= F_LJUST;
4455 width = -width;
4456 }
Guido van Rossume5372401993-03-16 12:15:04 +00004457 if (--fmtcnt >= 0)
4458 c = *fmt++;
4459 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004460 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004461 width = c - '0';
4462 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004463 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004464 if (!isdigit(c))
4465 break;
4466 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004467 PyErr_SetString(
4468 PyExc_ValueError,
4469 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004470 goto error;
4471 }
4472 width = width*10 + (c - '0');
4473 }
4474 }
4475 if (c == '.') {
4476 prec = 0;
4477 if (--fmtcnt >= 0)
4478 c = *fmt++;
4479 if (c == '*') {
4480 v = getnextarg(args, arglen, &argidx);
4481 if (v == NULL)
4482 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004483 if (!PyInt_Check(v)) {
4484 PyErr_SetString(
4485 PyExc_TypeError,
4486 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004487 goto error;
4488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004490 if (prec == -1 && PyErr_Occurred())
4491 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004492 if (prec < 0)
4493 prec = 0;
4494 if (--fmtcnt >= 0)
4495 c = *fmt++;
4496 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004497 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004498 prec = c - '0';
4499 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004500 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004501 if (!isdigit(c))
4502 break;
4503 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004504 PyErr_SetString(
4505 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004506 "prec too big");
4507 goto error;
4508 }
4509 prec = prec*10 + (c - '0');
4510 }
4511 }
4512 } /* prec */
4513 if (fmtcnt >= 0) {
4514 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004515 if (--fmtcnt >= 0)
4516 c = *fmt++;
4517 }
4518 }
4519 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 PyErr_SetString(PyExc_ValueError,
4521 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004522 goto error;
4523 }
4524 if (c != '%') {
4525 v = getnextarg(args, arglen, &argidx);
4526 if (v == NULL)
4527 goto error;
4528 }
4529 sign = 0;
4530 fill = ' ';
4531 switch (c) {
4532 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004533 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004534 len = 1;
4535 break;
4536 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004537 if (PyUnicode_Check(v)) {
4538 fmt = fmt_start;
4539 argidx = argidx_start;
4540 goto unicode;
4541 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004542 temp = _PyObject_Str(v);
4543 if (temp != NULL && PyUnicode_Check(temp)) {
4544 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004545 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004546 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004547 goto unicode;
4548 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004549 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004550 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004551 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004552 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004553 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004554 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004555 if (!PyString_Check(temp)) {
4556 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004557 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004558 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004559 goto error;
4560 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004561 pbuf = PyString_AS_STRING(temp);
4562 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004563 if (prec >= 0 && len > prec)
4564 len = prec;
4565 break;
4566 case 'i':
4567 case 'd':
4568 case 'u':
4569 case 'o':
4570 case 'x':
4571 case 'X':
4572 if (c == 'i')
4573 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004574 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004575 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004576 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004577 prec, c, &pbuf, &ilen);
4578 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004579 if (!temp)
4580 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004581 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004582 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004583 else {
4584 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004585 len = formatint(pbuf,
4586 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004587 flags, prec, c, v);
4588 if (len < 0)
4589 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004590 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004591 }
4592 if (flags & F_ZERO)
4593 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004594 break;
4595 case 'e':
4596 case 'E':
4597 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004598 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004599 case 'g':
4600 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004601 if (c == 'F')
4602 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004603 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004604 len = formatfloat(pbuf, sizeof(formatbuf),
4605 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004606 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004607 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004608 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004609 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004610 fill = '0';
4611 break;
4612 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004613 if (PyUnicode_Check(v)) {
4614 fmt = fmt_start;
4615 argidx = argidx_start;
4616 goto unicode;
4617 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004618 pbuf = formatbuf;
4619 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004620 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004621 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004622 break;
4623 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004624 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004625 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004626 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004627 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004628 (Py_ssize_t)(fmt - 1 -
4629 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004630 goto error;
4631 }
4632 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004633 if (*pbuf == '-' || *pbuf == '+') {
4634 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004635 len--;
4636 }
4637 else if (flags & F_SIGN)
4638 sign = '+';
4639 else if (flags & F_BLANK)
4640 sign = ' ';
4641 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004642 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004643 }
4644 if (width < len)
4645 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004646 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004647 reslen -= rescnt;
4648 rescnt = width + fmtcnt + 100;
4649 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004650 if (reslen < 0) {
4651 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004652 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004653 return PyErr_NoMemory();
4654 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004655 if (_PyString_Resize(&result, reslen) < 0) {
4656 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004657 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004658 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004659 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004660 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004661 }
4662 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004663 if (fill != ' ')
4664 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004665 rescnt--;
4666 if (width > len)
4667 width--;
4668 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004669 if ((flags & F_ALT) &&
4670 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004671 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004672 assert(pbuf[1] == c);
4673 if (fill != ' ') {
4674 *res++ = *pbuf++;
4675 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004676 }
Tim Petersfff53252001-04-12 18:38:48 +00004677 rescnt -= 2;
4678 width -= 2;
4679 if (width < 0)
4680 width = 0;
4681 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004682 }
4683 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004684 do {
4685 --rescnt;
4686 *res++ = fill;
4687 } while (--width > len);
4688 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004689 if (fill == ' ') {
4690 if (sign)
4691 *res++ = sign;
4692 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004693 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004694 assert(pbuf[0] == '0');
4695 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004696 *res++ = *pbuf++;
4697 *res++ = *pbuf++;
4698 }
4699 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004700 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004701 res += len;
4702 rescnt -= len;
4703 while (--width >= len) {
4704 --rescnt;
4705 *res++ = ' ';
4706 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004707 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004708 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004709 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004710 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004711 goto error;
4712 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004713 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004714 } /* '%' */
4715 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004716 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004717 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004718 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004719 goto error;
4720 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004721 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004722 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004723 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004724 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004725 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004726
4727 unicode:
4728 if (args_owned) {
4729 Py_DECREF(args);
4730 args_owned = 0;
4731 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004732 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004733 if (PyTuple_Check(orig_args) && argidx > 0) {
4734 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004735 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004736 v = PyTuple_New(n);
4737 if (v == NULL)
4738 goto error;
4739 while (--n >= 0) {
4740 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4741 Py_INCREF(w);
4742 PyTuple_SET_ITEM(v, n, w);
4743 }
4744 args = v;
4745 } else {
4746 Py_INCREF(orig_args);
4747 args = orig_args;
4748 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004749 args_owned = 1;
4750 /* Take what we have of the result and let the Unicode formatting
4751 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004752 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004753 if (_PyString_Resize(&result, rescnt))
4754 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004755 fmtcnt = PyString_GET_SIZE(format) - \
4756 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004757 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4758 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004759 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004760 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004761 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004762 if (v == NULL)
4763 goto error;
4764 /* Paste what we have (result) to what the Unicode formatting
4765 function returned (v) and return the result (or error) */
4766 w = PyUnicode_Concat(result, v);
4767 Py_DECREF(result);
4768 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004769 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004770 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004771
Guido van Rossume5372401993-03-16 12:15:04 +00004772 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004773 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004774 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004775 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004776 }
Guido van Rossume5372401993-03-16 12:15:04 +00004777 return NULL;
4778}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004779
Guido van Rossum2a61e741997-01-18 07:55:05 +00004780void
Fred Drakeba096332000-07-09 07:04:36 +00004781PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004782{
4783 register PyStringObject *s = (PyStringObject *)(*p);
4784 PyObject *t;
4785 if (s == NULL || !PyString_Check(s))
4786 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004787 /* If it's a string subclass, we don't really know what putting
4788 it in the interned dict might do. */
4789 if (!PyString_CheckExact(s))
4790 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004791 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004792 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004793 if (interned == NULL) {
4794 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004795 if (interned == NULL) {
4796 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004797 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004798 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004799 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004800 t = PyDict_GetItem(interned, (PyObject *)s);
4801 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004802 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004803 Py_DECREF(*p);
4804 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004805 return;
4806 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004807
Armin Rigo79f7ad22004-08-07 19:27:39 +00004808 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004809 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004810 return;
4811 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004812 /* The two references in interned are not counted by refcnt.
4813 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004814 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004815 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004816}
4817
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004818void
4819PyString_InternImmortal(PyObject **p)
4820{
4821 PyString_InternInPlace(p);
4822 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4823 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4824 Py_INCREF(*p);
4825 }
4826}
4827
Guido van Rossum2a61e741997-01-18 07:55:05 +00004828
4829PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004830PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004831{
4832 PyObject *s = PyString_FromString(cp);
4833 if (s == NULL)
4834 return NULL;
4835 PyString_InternInPlace(&s);
4836 return s;
4837}
4838
Guido van Rossum8cf04761997-08-02 02:57:45 +00004839void
Fred Drakeba096332000-07-09 07:04:36 +00004840PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004841{
4842 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004843 for (i = 0; i < UCHAR_MAX + 1; i++) {
4844 Py_XDECREF(characters[i]);
4845 characters[i] = NULL;
4846 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004847 Py_XDECREF(nullstring);
4848 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004849}
Barry Warsawa903ad982001-02-23 16:40:48 +00004850
Barry Warsawa903ad982001-02-23 16:40:48 +00004851void _Py_ReleaseInternedStrings(void)
4852{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004853 PyObject *keys;
4854 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004855 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004856 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004857
4858 if (interned == NULL || !PyDict_Check(interned))
4859 return;
4860 keys = PyDict_Keys(interned);
4861 if (keys == NULL || !PyList_Check(keys)) {
4862 PyErr_Clear();
4863 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004864 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004865
4866 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4867 detector, interned strings are not forcibly deallocated; rather, we
4868 give them their stolen references back, and then clear and DECREF
4869 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004870
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004871 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004872 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4873 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004874 for (i = 0; i < n; i++) {
4875 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4876 switch (s->ob_sstate) {
4877 case SSTATE_NOT_INTERNED:
4878 /* XXX Shouldn't happen */
4879 break;
4880 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004881 Py_Refcnt(s) += 1;
4882 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004883 break;
4884 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004885 Py_Refcnt(s) += 2;
4886 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004887 break;
4888 default:
4889 Py_FatalError("Inconsistent interned string state.");
4890 }
4891 s->ob_sstate = SSTATE_NOT_INTERNED;
4892 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004893 fprintf(stderr, "total size of all interned strings: "
4894 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4895 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004896 Py_DECREF(keys);
4897 PyDict_Clear(interned);
4898 Py_DECREF(interned);
4899 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004900}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004901
4902
4903/*********************** Str Iterator ****************************/
4904
4905typedef struct {
4906 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004907 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004908 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4909} striterobject;
4910
4911static void
4912striter_dealloc(striterobject *it)
4913{
4914 _PyObject_GC_UNTRACK(it);
4915 Py_XDECREF(it->it_seq);
4916 PyObject_GC_Del(it);
4917}
4918
4919static int
4920striter_traverse(striterobject *it, visitproc visit, void *arg)
4921{
4922 Py_VISIT(it->it_seq);
4923 return 0;
4924}
4925
4926static PyObject *
4927striter_next(striterobject *it)
4928{
4929 PyStringObject *seq;
4930 PyObject *item;
4931
4932 assert(it != NULL);
4933 seq = it->it_seq;
4934 if (seq == NULL)
4935 return NULL;
4936 assert(PyString_Check(seq));
4937
4938 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004939 item = PyString_FromStringAndSize(
4940 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004941 if (item != NULL)
4942 ++it->it_index;
4943 return item;
4944 }
4945
4946 Py_DECREF(seq);
4947 it->it_seq = NULL;
4948 return NULL;
4949}
4950
4951static PyObject *
4952striter_len(striterobject *it)
4953{
4954 Py_ssize_t len = 0;
4955 if (it->it_seq)
4956 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4957 return PyInt_FromSsize_t(len);
4958}
4959
Guido van Rossum49d6b072006-08-17 21:11:47 +00004960PyDoc_STRVAR(length_hint_doc,
4961 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004962
4963static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004964 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4965 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004966 {NULL, NULL} /* sentinel */
4967};
4968
4969PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004970 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004971 "striterator", /* tp_name */
4972 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004973 0, /* tp_itemsize */
4974 /* methods */
4975 (destructor)striter_dealloc, /* tp_dealloc */
4976 0, /* tp_print */
4977 0, /* tp_getattr */
4978 0, /* tp_setattr */
4979 0, /* tp_compare */
4980 0, /* tp_repr */
4981 0, /* tp_as_number */
4982 0, /* tp_as_sequence */
4983 0, /* tp_as_mapping */
4984 0, /* tp_hash */
4985 0, /* tp_call */
4986 0, /* tp_str */
4987 PyObject_GenericGetAttr, /* tp_getattro */
4988 0, /* tp_setattro */
4989 0, /* tp_as_buffer */
4990 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4991 0, /* tp_doc */
4992 (traverseproc)striter_traverse, /* tp_traverse */
4993 0, /* tp_clear */
4994 0, /* tp_richcompare */
4995 0, /* tp_weaklistoffset */
4996 PyObject_SelfIter, /* tp_iter */
4997 (iternextfunc)striter_next, /* tp_iternext */
4998 striter_methods, /* tp_methods */
4999 0,
5000};
5001
5002static PyObject *
5003str_iter(PyObject *seq)
5004{
5005 striterobject *it;
5006
5007 if (!PyString_Check(seq)) {
5008 PyErr_BadInternalCall();
5009 return NULL;
5010 }
5011 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5012 if (it == NULL)
5013 return NULL;
5014 it->it_index = 0;
5015 Py_INCREF(seq);
5016 it->it_seq = (PyStringObject *)seq;
5017 _PyObject_GC_TRACK(it);
5018 return (PyObject *)it;
5019}