blob: d26381251d948b59b2c8b2ffae5f5dade97afc6d [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000732 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000767PyObject *
768PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000770 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000772 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000773 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000774 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000775 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000776 PyErr_SetString(PyExc_OverflowError,
777 "string is too large to make repr");
778 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000779 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000781 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 }
783 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000784 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000785 register Py_UNICODE c;
786 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 int quote;
788
Thomas Wouters7e474022000-07-16 12:04:32 +0000789 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000791 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000792 char *test, *start;
793 start = PyString_AS_STRING(op);
794 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (*test == '"') {
796 quote = '\''; /* switch back to single quote */
797 goto decided;
798 }
799 else if (*test == '\'')
800 quote = '"';
801 }
802 decided:
803 ;
804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000806 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000807 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000808 /* There's at least enough room for a hex escape
809 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000810 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000814 else if (c == '\t')
815 *p++ = '\\', *p++ = 't';
816 else if (c == '\n')
817 *p++ = '\\', *p++ = 'n';
818 else if (c == '\r')
819 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000821 *p++ = '\\';
822 *p++ = 'x';
823 *p++ = hexdigits[(c & 0xf0) >> 4];
824 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 else
827 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000829 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000832 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
833 Py_DECREF(v);
834 return NULL;
835 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000836 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838}
839
Guido van Rossum189f1df2001-05-01 16:51:53 +0000840static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841string_repr(PyObject *op)
842{
843 return PyString_Repr(op, 1);
844}
845
846static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000847string_str(PyObject *s)
848{
Tim Petersc9933152001-10-16 20:18:24 +0000849 assert(PyString_Check(s));
850 if (PyString_CheckExact(s)) {
851 Py_INCREF(s);
852 return s;
853 }
854 else {
855 /* Subtype -- return genuine string with the same value. */
856 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000857 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000858 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859}
860
Martin v. Löwis18e16552006-02-15 17:27:45 +0000861static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000862string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000864 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865}
866
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000867static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000868string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000870 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871 register PyStringObject *op;
872 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000873 if (PyUnicode_Check(bb))
874 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000875 if (PyBytes_Check(bb))
876 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000877 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000878 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 return NULL;
881 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000882#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000884 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000885 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000886 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000887 Py_INCREF(bb);
888 return bb;
889 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 Py_INCREF(a);
891 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000893 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000894 if (size < 0) {
895 PyErr_SetString(PyExc_OverflowError,
896 "strings are too large to concat");
897 return NULL;
898 }
899
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000900 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000901 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000902 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000904 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000905 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000906 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000907 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
908 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000909 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911#undef b
912}
913
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000917 register Py_ssize_t i;
918 register Py_ssize_t j;
919 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000920 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000921 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 if (n < 0)
923 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000924 /* watch out for overflows: the size can overflow int,
925 * and the # of bytes needed can overflow size_t
926 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000927 size = Py_Size(a) * n;
928 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000929 PyErr_SetString(PyExc_OverflowError,
930 "repeated string is too long");
931 return NULL;
932 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000933 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 Py_INCREF(a);
935 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 }
Tim Peterse7c05322004-06-27 17:24:49 +0000937 nbytes = (size_t)size;
938 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000944 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000945 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000947 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000948 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000949 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000950 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000951 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000952 memset(op->ob_sval, a->ob_sval[0] , n);
953 return (PyObject *) op;
954 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000955 i = 0;
956 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000957 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
958 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 }
960 while (i < size) {
961 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 i += j;
964 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966}
967
Guido van Rossum9284a572000-03-07 15:53:43 +0000968static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000969string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000970{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000971 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000972 if (PyUnicode_Check(sub_obj))
973 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000974 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000975 PyErr_Format(PyExc_TypeError,
976 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000977 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000978 return -1;
979 }
Guido van Rossum9284a572000-03-07 15:53:43 +0000980 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000981
Thomas Wouters477c8d52006-05-27 19:21:47 +0000982 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +0000983}
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000986string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000988 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000990 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992 return NULL;
993 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000994 pchar = a->ob_sval[i];
995 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000996 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000997 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000998 else {
999#ifdef COUNT_ALLOCS
1000 one_strings++;
1001#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001002 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001003 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001004 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001005}
1006
Martin v. Löwiscd353062001-05-24 16:56:35 +00001007static PyObject*
1008string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001010 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001011 Py_ssize_t len_a, len_b;
1012 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001013 PyObject *result;
1014
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001015 /* Make sure both arguments are strings. */
1016 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001017 result = Py_NotImplemented;
1018 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001019 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001020 if (a == b) {
1021 switch (op) {
1022 case Py_EQ:case Py_LE:case Py_GE:
1023 result = Py_True;
1024 goto out;
1025 case Py_NE:case Py_LT:case Py_GT:
1026 result = Py_False;
1027 goto out;
1028 }
1029 }
1030 if (op == Py_EQ) {
1031 /* Supporting Py_NE here as well does not save
1032 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001033 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001034 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001035 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001036 result = Py_True;
1037 } else {
1038 result = Py_False;
1039 }
1040 goto out;
1041 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001042 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001043 min_len = (len_a < len_b) ? len_a : len_b;
1044 if (min_len > 0) {
1045 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1046 if (c==0)
1047 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001048 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001049 c = 0;
1050 if (c == 0)
1051 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1052 switch (op) {
1053 case Py_LT: c = c < 0; break;
1054 case Py_LE: c = c <= 0; break;
1055 case Py_EQ: assert(0); break; /* unreachable */
1056 case Py_NE: c = c != 0; break;
1057 case Py_GT: c = c > 0; break;
1058 case Py_GE: c = c >= 0; break;
1059 default:
1060 result = Py_NotImplemented;
1061 goto out;
1062 }
1063 result = c ? Py_True : Py_False;
1064 out:
1065 Py_INCREF(result);
1066 return result;
1067}
1068
1069int
1070_PyString_Eq(PyObject *o1, PyObject *o2)
1071{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 PyStringObject *a = (PyStringObject*) o1;
1073 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001074 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001076 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077}
1078
Guido van Rossum9bfef441993-03-29 10:43:31 +00001079static long
Fred Drakeba096332000-07-09 07:04:36 +00001080string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001081{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001082 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001083 register unsigned char *p;
1084 register long x;
1085
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001086 if (a->ob_shash != -1)
1087 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001088 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001089 p = (unsigned char *) a->ob_sval;
1090 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001091 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001092 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001093 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001094 if (x == -1)
1095 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001096 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001097 return x;
1098}
1099
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001100static PyObject*
1101string_subscript(PyStringObject* self, PyObject* item)
1102{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001103 if (PyIndex_Check(item)) {
1104 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001105 if (i == -1 && PyErr_Occurred())
1106 return NULL;
1107 if (i < 0)
1108 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001109 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001110 }
1111 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001113 char* source_buf;
1114 char* result_buf;
1115 PyObject* result;
1116
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001117 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001118 PyString_GET_SIZE(self),
1119 &start, &stop, &step, &slicelength) < 0) {
1120 return NULL;
1121 }
1122
1123 if (slicelength <= 0) {
1124 return PyString_FromStringAndSize("", 0);
1125 }
Thomas Woutersed03b412007-08-28 21:37:11 +00001126 else if (start == 0 && step == 1 &&
1127 slicelength == PyString_GET_SIZE(self) &&
1128 PyString_CheckExact(self)) {
1129 Py_INCREF(self);
1130 return (PyObject *)self;
1131 }
1132 else if (step == 1) {
1133 return PyString_FromStringAndSize(
1134 PyString_AS_STRING(self) + start,
1135 slicelength);
1136 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001137 else {
1138 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001139 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001140 if (result_buf == NULL)
1141 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001142
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001143 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001144 cur += step, i++) {
1145 result_buf[i] = source_buf[cur];
1146 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147
1148 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001149 slicelength);
1150 PyMem_Free(result_buf);
1151 return result;
1152 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001153 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001154 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001155 PyErr_Format(PyExc_TypeError,
1156 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001157 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001158 return NULL;
1159 }
1160}
1161
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001162static int
1163string_buffer_getbuffer(PyStringObject *self, PyBuffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001164{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001165 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001166}
1167
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001168static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001170 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001171 (ssizeargfunc)string_repeat, /*sq_repeat*/
1172 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001173 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001174 0, /*sq_ass_item*/
1175 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001176 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001177};
1178
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001179static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001180 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001181 (binaryfunc)string_subscript,
1182 0,
1183};
1184
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001185static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001186 (getbufferproc)string_buffer_getbuffer,
1187 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001188};
1189
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001191#define LEFTSTRIP 0
1192#define RIGHTSTRIP 1
1193#define BOTHSTRIP 2
1194
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001195/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001196static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1197
1198#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001199
Thomas Wouters477c8d52006-05-27 19:21:47 +00001200
1201/* Don't call if length < 2 */
1202#define Py_STRING_MATCH(target, offset, pattern, length) \
1203 (target[offset] == pattern[0] && \
1204 target[offset+length-1] == pattern[length-1] && \
1205 !memcmp(target+offset+1, pattern+1, length-2) )
1206
1207
1208/* Overallocate the initial list to reduce the number of reallocs for small
1209 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1210 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1211 text (roughly 11 words per line) and field delimited data (usually 1-10
1212 fields). For large strings the split algorithms are bandwidth limited
1213 so increasing the preallocation likely will not improve things.*/
1214
1215#define MAX_PREALLOC 12
1216
1217/* 5 splits gives 6 elements */
1218#define PREALLOC_SIZE(maxsplit) \
1219 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1220
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001221#define SPLIT_APPEND(data, left, right) \
1222 str = PyString_FromStringAndSize((data) + (left), \
1223 (right) - (left)); \
1224 if (str == NULL) \
1225 goto onError; \
1226 if (PyList_Append(list, str)) { \
1227 Py_DECREF(str); \
1228 goto onError; \
1229 } \
1230 else \
1231 Py_DECREF(str);
1232
Thomas Wouters477c8d52006-05-27 19:21:47 +00001233#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001234 str = PyString_FromStringAndSize((data) + (left), \
1235 (right) - (left)); \
1236 if (str == NULL) \
1237 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001238 if (count < MAX_PREALLOC) { \
1239 PyList_SET_ITEM(list, count, str); \
1240 } else { \
1241 if (PyList_Append(list, str)) { \
1242 Py_DECREF(str); \
1243 goto onError; \
1244 } \
1245 else \
1246 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001247 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001248 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249
Thomas Wouters477c8d52006-05-27 19:21:47 +00001250/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001251#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001252
1253#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1254#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1255#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1256#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1257
1258Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001259split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001261 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001262 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001263 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001264
1265 if (list == NULL)
1266 return NULL;
1267
Thomas Wouters477c8d52006-05-27 19:21:47 +00001268 i = j = 0;
1269
1270 while (maxsplit-- > 0) {
1271 SKIP_SPACE(s, i, len);
1272 if (i==len) break;
1273 j = i; i++;
1274 SKIP_NONSPACE(s, i, len);
1275 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001276 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277
1278 if (i < len) {
1279 /* Only occurs when maxsplit was reached */
1280 /* Skip any remaining whitespace and copy to end of string */
1281 SKIP_SPACE(s, i, len);
1282 if (i != len)
1283 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001287 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 Py_DECREF(list);
1289 return NULL;
1290}
1291
Thomas Wouters477c8d52006-05-27 19:21:47 +00001292Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001293split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001294{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001295 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001296 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001298
1299 if (list == NULL)
1300 return NULL;
1301
Thomas Wouters477c8d52006-05-27 19:21:47 +00001302 i = j = 0;
1303 while ((j < len) && (maxcount-- > 0)) {
1304 for(; j<len; j++) {
1305 /* I found that using memchr makes no difference */
1306 if (s[j] == ch) {
1307 SPLIT_ADD(s, i, j);
1308 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001309 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001310 }
1311 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001312 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313 if (i <= len) {
1314 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001315 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001317 return list;
1318
1319 onError:
1320 Py_DECREF(list);
1321 return NULL;
1322}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001324PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325"S.split([sep [,maxsplit]]) -> list of strings\n\
1326\n\
1327Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001329splits are done. If sep is not specified or is None, any\n\
1330whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331
1332static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001333string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001335 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001338 PyObject *list, *str, *subobj = Py_None;
1339#ifdef USE_FAST
1340 Py_ssize_t pos;
1341#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001343 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001346 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 if (PyString_Check(subobj)) {
1350 sub = PyString_AS_STRING(subobj);
1351 n = PyString_GET_SIZE(subobj);
1352 }
1353 else if (PyUnicode_Check(subobj))
1354 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1355 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1356 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001357
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 if (n == 0) {
1359 PyErr_SetString(PyExc_ValueError, "empty separator");
1360 return NULL;
1361 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001362 else if (n == 1)
1363 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 if (list == NULL)
1367 return NULL;
1368
Thomas Wouters477c8d52006-05-27 19:21:47 +00001369#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001371 while (maxsplit-- > 0) {
1372 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1373 if (pos < 0)
1374 break;
1375 j = i+pos;
1376 SPLIT_ADD(s, i, j);
1377 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001379#else
1380 i = j = 0;
1381 while ((j+n <= len) && (maxsplit-- > 0)) {
1382 for (; j+n <= len; j++) {
1383 if (Py_STRING_MATCH(s, j, sub, n)) {
1384 SPLIT_ADD(s, i, j);
1385 i = j = j + n;
1386 break;
1387 }
1388 }
1389 }
1390#endif
1391 SPLIT_ADD(s, i, len);
1392 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 return list;
1394
Thomas Wouters477c8d52006-05-27 19:21:47 +00001395 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396 Py_DECREF(list);
1397 return NULL;
1398}
1399
Thomas Wouters477c8d52006-05-27 19:21:47 +00001400PyDoc_STRVAR(partition__doc__,
1401"S.partition(sep) -> (head, sep, tail)\n\
1402\n\
1403Searches for the separator sep in S, and returns the part before it,\n\
1404the separator itself, and the part after it. If the separator is not\n\
1405found, returns S and two empty strings.");
1406
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001407static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408string_partition(PyStringObject *self, PyObject *sep_obj)
1409{
1410 const char *sep;
1411 Py_ssize_t sep_len;
1412
1413 if (PyString_Check(sep_obj)) {
1414 sep = PyString_AS_STRING(sep_obj);
1415 sep_len = PyString_GET_SIZE(sep_obj);
1416 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001417 else if (PyUnicode_Check(sep_obj))
1418 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001419 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1420 return NULL;
1421
1422 return stringlib_partition(
1423 (PyObject*) self,
1424 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1425 sep_obj, sep, sep_len
1426 );
1427}
1428
1429PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001430"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001431\n\
1432Searches for the separator sep in S, starting at the end of S, and returns\n\
1433the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001434separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001435
1436static PyObject *
1437string_rpartition(PyStringObject *self, PyObject *sep_obj)
1438{
1439 const char *sep;
1440 Py_ssize_t sep_len;
1441
1442 if (PyString_Check(sep_obj)) {
1443 sep = PyString_AS_STRING(sep_obj);
1444 sep_len = PyString_GET_SIZE(sep_obj);
1445 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001446 else if (PyUnicode_Check(sep_obj))
1447 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001448 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1449 return NULL;
1450
1451 return stringlib_rpartition(
1452 (PyObject*) self,
1453 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1454 sep_obj, sep, sep_len
1455 );
1456}
1457
1458Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001460{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001461 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001463 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001464
1465 if (list == NULL)
1466 return NULL;
1467
Thomas Wouters477c8d52006-05-27 19:21:47 +00001468 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001469
Thomas Wouters477c8d52006-05-27 19:21:47 +00001470 while (maxsplit-- > 0) {
1471 RSKIP_SPACE(s, i);
1472 if (i<0) break;
1473 j = i; i--;
1474 RSKIP_NONSPACE(s, i);
1475 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001476 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001477 if (i >= 0) {
1478 /* Only occurs when maxsplit was reached */
1479 /* Skip any remaining whitespace and copy to beginning of string */
1480 RSKIP_SPACE(s, i);
1481 if (i >= 0)
1482 SPLIT_ADD(s, 0, i + 1);
1483
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001484 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001485 FIX_PREALLOC_SIZE(list);
1486 if (PyList_Reverse(list) < 0)
1487 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001488 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001490 Py_DECREF(list);
1491 return NULL;
1492}
1493
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001495rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001496{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001497 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001498 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001499 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001500
1501 if (list == NULL)
1502 return NULL;
1503
Thomas Wouters477c8d52006-05-27 19:21:47 +00001504 i = j = len - 1;
1505 while ((i >= 0) && (maxcount-- > 0)) {
1506 for (; i >= 0; i--) {
1507 if (s[i] == ch) {
1508 SPLIT_ADD(s, i + 1, j + 1);
1509 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 }
1512 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001513 }
1514 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001515 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001516 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001517 FIX_PREALLOC_SIZE(list);
1518 if (PyList_Reverse(list) < 0)
1519 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001520 return list;
1521
1522 onError:
1523 Py_DECREF(list);
1524 return NULL;
1525}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001526
1527PyDoc_STRVAR(rsplit__doc__,
1528"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1529\n\
1530Return a list of the words in the string S, using sep as the\n\
1531delimiter string, starting at the end of the string and working\n\
1532to the front. If maxsplit is given, at most maxsplit splits are\n\
1533done. If sep is not specified or is None, any whitespace string\n\
1534is a separator.");
1535
1536static PyObject *
1537string_rsplit(PyStringObject *self, PyObject *args)
1538{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001539 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001542 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001543
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001544 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001545 return NULL;
1546 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001547 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001548 if (subobj == Py_None)
1549 return rsplit_whitespace(s, len, maxsplit);
1550 if (PyString_Check(subobj)) {
1551 sub = PyString_AS_STRING(subobj);
1552 n = PyString_GET_SIZE(subobj);
1553 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001554 else if (PyUnicode_Check(subobj))
1555 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001558
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001559 if (n == 0) {
1560 PyErr_SetString(PyExc_ValueError, "empty separator");
1561 return NULL;
1562 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001563 else if (n == 1)
1564 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001565
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001567 if (list == NULL)
1568 return NULL;
1569
1570 j = len;
1571 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001572
Thomas Wouters477c8d52006-05-27 19:21:47 +00001573 while ( (i >= 0) && (maxsplit-- > 0) ) {
1574 for (; i>=0; i--) {
1575 if (Py_STRING_MATCH(s, i, sub, n)) {
1576 SPLIT_ADD(s, i + n, j);
1577 j = i;
1578 i -= n;
1579 break;
1580 }
1581 }
1582 }
1583 SPLIT_ADD(s, 0, j);
1584 FIX_PREALLOC_SIZE(list);
1585 if (PyList_Reverse(list) < 0)
1586 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001587 return list;
1588
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 Py_DECREF(list);
1591 return NULL;
1592}
1593
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596"S.join(sequence) -> string\n\
1597\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001598Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001599sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600
1601static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001602string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603{
1604 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001605 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001608 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001609 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001610 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001611 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612
Tim Peters19fe14e2001-01-19 03:03:47 +00001613 seq = PySequence_Fast(orig, "");
1614 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001615 return NULL;
1616 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001617
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001618 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001619 if (seqlen == 0) {
1620 Py_DECREF(seq);
1621 return PyString_FromString("");
1622 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001624 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001625 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1626 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001627 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001628 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001629 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001631
Raymond Hettinger674f2412004-08-23 23:23:54 +00001632 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001633 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001634 * Do a pre-pass to figure out the total amount of space we'll
1635 * need (sz), see whether any argument is absurd, and defer to
1636 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001637 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001640 item = PySequence_Fast_GET_ITEM(seq, i);
1641 if (!PyString_Check(item)){
1642 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001643 /* Defer to Unicode join.
1644 * CAUTION: There's no gurantee that the
1645 * original sequence can be iterated over
1646 * again, so we must pass seq here.
1647 */
1648 PyObject *result;
1649 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001650 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001651 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001652 }
1653 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001654 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001655 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001656 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001657 Py_DECREF(seq);
1658 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001659 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 sz += PyString_GET_SIZE(item);
1661 if (i != 0)
1662 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001663 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001664 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001665 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001666 Py_DECREF(seq);
1667 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001669 }
1670
1671 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001673 if (res == NULL) {
1674 Py_DECREF(seq);
1675 return NULL;
1676 }
1677
1678 /* Catenate everything. */
1679 p = PyString_AS_STRING(res);
1680 for (i = 0; i < seqlen; ++i) {
1681 size_t n;
1682 item = PySequence_Fast_GET_ITEM(seq, i);
1683 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001684 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001685 p += n;
1686 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001687 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001688 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001691
Jeremy Hylton49048292000-07-11 03:28:17 +00001692 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694}
1695
Tim Peters52e155e2001-06-16 05:42:57 +00001696PyObject *
1697_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001698{
Tim Petersa7259592001-06-16 05:11:17 +00001699 assert(sep != NULL && PyString_Check(sep));
1700 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001701 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001702}
1703
Thomas Wouters477c8d52006-05-27 19:21:47 +00001704Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001705string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001706{
1707 if (*end > len)
1708 *end = len;
1709 else if (*end < 0)
1710 *end += len;
1711 if (*end < 0)
1712 *end = 0;
1713 if (*start < 0)
1714 *start += len;
1715 if (*start < 0)
1716 *start = 0;
1717}
1718
Thomas Wouters477c8d52006-05-27 19:21:47 +00001719Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001720string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001722 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723 const char *sub;
1724 Py_ssize_t sub_len;
1725 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726
Thomas Wouters477c8d52006-05-27 19:21:47 +00001727 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1728 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001729 return -2;
1730 if (PyString_Check(subobj)) {
1731 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733 }
1734 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001735 return PyUnicode_Find(
1736 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001737 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001738 /* XXX - the "expected a character buffer object" is pretty
1739 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 return -2;
1741
Thomas Wouters477c8d52006-05-27 19:21:47 +00001742 if (dir > 0)
1743 return stringlib_find_slice(
1744 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1745 sub, sub_len, start, end);
1746 else
1747 return stringlib_rfind_slice(
1748 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1749 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750}
1751
1752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754"S.find(sub [,start [,end]]) -> int\n\
1755\n\
1756Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001757such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758arguments start and end are interpreted as in slice notation.\n\
1759\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001760Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761
1762static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001763string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001765 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 if (result == -2)
1767 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001768 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769}
1770
1771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773"S.index(sub [,start [,end]]) -> int\n\
1774\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
1777static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001778string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001780 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 if (result == -2)
1782 return NULL;
1783 if (result == -1) {
1784 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001785 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786 return NULL;
1787 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001788 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789}
1790
1791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001792PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793"S.rfind(sub [,start [,end]]) -> int\n\
1794\n\
1795Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001796such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797arguments start and end are interpreted as in slice notation.\n\
1798\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001802string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001804 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 if (result == -2)
1806 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001807 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808}
1809
1810
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812"S.rindex(sub [,start [,end]]) -> int\n\
1813\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815
1816static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001817string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001819 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 if (result == -2)
1821 return NULL;
1822 if (result == -1) {
1823 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001824 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 return NULL;
1826 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001827 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828}
1829
1830
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001832do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1833{
1834 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001835 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001836 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001837 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1838 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001839
1840 i = 0;
1841 if (striptype != RIGHTSTRIP) {
1842 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1843 i++;
1844 }
1845 }
1846
1847 j = len;
1848 if (striptype != LEFTSTRIP) {
1849 do {
1850 j--;
1851 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1852 j++;
1853 }
1854
1855 if (i == 0 && j == len && PyString_CheckExact(self)) {
1856 Py_INCREF(self);
1857 return (PyObject*)self;
1858 }
1859 else
1860 return PyString_FromStringAndSize(s+i, j-i);
1861}
1862
1863
Thomas Wouters477c8d52006-05-27 19:21:47 +00001864Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001865do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866{
1867 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001868 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 i = 0;
1871 if (striptype != RIGHTSTRIP) {
1872 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1873 i++;
1874 }
1875 }
1876
1877 j = len;
1878 if (striptype != LEFTSTRIP) {
1879 do {
1880 j--;
1881 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1882 j++;
1883 }
1884
Tim Peters8fa5dd02001-09-12 02:18:30 +00001885 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 Py_INCREF(self);
1887 return (PyObject*)self;
1888 }
1889 else
1890 return PyString_FromStringAndSize(s+i, j-i);
1891}
1892
1893
Thomas Wouters477c8d52006-05-27 19:21:47 +00001894Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001895do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1896{
1897 PyObject *sep = NULL;
1898
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001899 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001900 return NULL;
1901
1902 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001903 if (PyString_Check(sep))
1904 return do_xstrip(self, striptype, sep);
1905 else if (PyUnicode_Check(sep)) {
1906 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1907 PyObject *res;
1908 if (uniself==NULL)
1909 return NULL;
1910 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1911 striptype, sep);
1912 Py_DECREF(uniself);
1913 return res;
1914 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001915 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001916 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001917 STRIPNAME(striptype));
1918 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001919 }
1920
1921 return do_strip(self, striptype);
1922}
1923
1924
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001925PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001926"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927\n\
1928Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001929whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001930If chars is given and not None, remove characters in chars instead.\n\
1931If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932
1933static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001934string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001936 if (PyTuple_GET_SIZE(args) == 0)
1937 return do_strip(self, BOTHSTRIP); /* Common case */
1938 else
1939 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940}
1941
1942
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001943PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001944"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001946Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001947If chars is given and not None, remove characters in chars instead.\n\
1948If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949
1950static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001951string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001953 if (PyTuple_GET_SIZE(args) == 0)
1954 return do_strip(self, LEFTSTRIP); /* Common case */
1955 else
1956 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957}
1958
1959
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001960PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001961"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001964If chars is given and not None, remove characters in chars instead.\n\
1965If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966
1967static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001968string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001970 if (PyTuple_GET_SIZE(args) == 0)
1971 return do_strip(self, RIGHTSTRIP); /* Common case */
1972 else
1973 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974}
1975
1976
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001977PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978"S.lower() -> string\n\
1979\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001980Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981
Thomas Wouters477c8d52006-05-27 19:21:47 +00001982/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1983#ifndef _tolower
1984#define _tolower tolower
1985#endif
1986
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001988string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001990 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001991 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001992 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001994 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001995 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001997
1998 s = PyString_AS_STRING(newobj);
1999
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002000 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002001
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002003 int c = Py_CHARMASK(s[i]);
2004 if (isupper(c))
2005 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002008 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009}
2010
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002011PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012"S.upper() -> string\n\
2013\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002014Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015
Thomas Wouters477c8d52006-05-27 19:21:47 +00002016#ifndef _toupper
2017#define _toupper toupper
2018#endif
2019
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002021string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002023 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002024 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002025 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002027 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002028 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002030
2031 s = PyString_AS_STRING(newobj);
2032
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002033 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002034
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002036 int c = Py_CHARMASK(s[i]);
2037 if (islower(c))
2038 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002040
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002041 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042}
2043
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002044PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045"S.title() -> string\n\
2046\n\
2047Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002048characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002049
2050static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002051string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052{
2053 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002054 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002056 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002058 newobj = PyString_FromStringAndSize(NULL, n);
2059 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002061 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002062 for (i = 0; i < n; i++) {
2063 int c = Py_CHARMASK(*s++);
2064 if (islower(c)) {
2065 if (!previous_is_cased)
2066 c = toupper(c);
2067 previous_is_cased = 1;
2068 } else if (isupper(c)) {
2069 if (previous_is_cased)
2070 c = tolower(c);
2071 previous_is_cased = 1;
2072 } else
2073 previous_is_cased = 0;
2074 *s_new++ = c;
2075 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002076 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002077}
2078
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002079PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080"S.capitalize() -> string\n\
2081\n\
2082Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002083capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084
2085static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002086string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087{
2088 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002089 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002090 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002092 newobj = PyString_FromStringAndSize(NULL, n);
2093 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002095 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096 if (0 < n) {
2097 int c = Py_CHARMASK(*s++);
2098 if (islower(c))
2099 *s_new = toupper(c);
2100 else
2101 *s_new = c;
2102 s_new++;
2103 }
2104 for (i = 1; i < n; i++) {
2105 int c = Py_CHARMASK(*s++);
2106 if (isupper(c))
2107 *s_new = tolower(c);
2108 else
2109 *s_new = c;
2110 s_new++;
2111 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002112 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113}
2114
2115
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002116PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117"S.count(sub[, start[, end]]) -> int\n\
2118\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002119Return the number of non-overlapping occurrences of substring sub in\n\
2120string S[start:end]. Optional arguments start and end are interpreted\n\
2121as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122
2123static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002124string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126 PyObject *sub_obj;
2127 const char *str = PyString_AS_STRING(self), *sub;
2128 Py_ssize_t sub_len;
2129 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130
Thomas Wouters477c8d52006-05-27 19:21:47 +00002131 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2132 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002134
Thomas Wouters477c8d52006-05-27 19:21:47 +00002135 if (PyString_Check(sub_obj)) {
2136 sub = PyString_AS_STRING(sub_obj);
2137 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002139 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002140 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002141 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002142 if (count == -1)
2143 return NULL;
2144 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002145 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002146 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 return NULL;
2149
Thomas Wouters477c8d52006-05-27 19:21:47 +00002150 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002151
Thomas Wouters477c8d52006-05-27 19:21:47 +00002152 return PyInt_FromSsize_t(
2153 stringlib_count(str + start, end - start, sub, sub_len)
2154 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155}
2156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158"S.swapcase() -> string\n\
2159\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002161converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162
2163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
2166 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002168 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002170 newobj = PyString_FromStringAndSize(NULL, n);
2171 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002173 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 for (i = 0; i < n; i++) {
2175 int c = Py_CHARMASK(*s++);
2176 if (islower(c)) {
2177 *s_new = toupper(c);
2178 }
2179 else if (isupper(c)) {
2180 *s_new = tolower(c);
2181 }
2182 else
2183 *s_new = c;
2184 s_new++;
2185 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002186 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187}
2188
2189
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002190PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191"S.translate(table [,deletechars]) -> string\n\
2192\n\
2193Return a copy of the string S, where all characters occurring\n\
2194in the optional argument deletechars are removed, and the\n\
2195remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002196translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197
2198static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002199string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002202 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002203 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002205 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002206 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 PyObject *result;
2208 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002211 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002212 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214
2215 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002216 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217 tablen = PyString_GET_SIZE(tableobj);
2218 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002219 else if (tableobj == Py_None) {
2220 table = NULL;
2221 tablen = 256;
2222 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002224 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225 parameter; instead a mapping to None will cause characters
2226 to be deleted. */
2227 if (delobj != NULL) {
2228 PyErr_SetString(PyExc_TypeError,
2229 "deletions are implemented differently for unicode");
2230 return NULL;
2231 }
2232 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002234 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236
Martin v. Löwis00b61272002-12-12 20:03:19 +00002237 if (tablen != 256) {
2238 PyErr_SetString(PyExc_ValueError,
2239 "translation table must be 256 characters long");
2240 return NULL;
2241 }
2242
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 if (delobj != NULL) {
2244 if (PyString_Check(delobj)) {
2245 del_table = PyString_AS_STRING(delobj);
2246 dellen = PyString_GET_SIZE(delobj);
2247 }
2248 else if (PyUnicode_Check(delobj)) {
2249 PyErr_SetString(PyExc_TypeError,
2250 "deletions are implemented differently for unicode");
2251 return NULL;
2252 }
2253 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2254 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002255 }
2256 else {
2257 del_table = NULL;
2258 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259 }
2260
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002261 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262 result = PyString_FromStringAndSize((char *)NULL, inlen);
2263 if (result == NULL)
2264 return NULL;
2265 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002266 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
Guido van Rossumd8faa362007-04-27 19:54:29 +00002268 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269 /* If no deletions are required, use faster code */
2270 for (i = inlen; --i >= 0; ) {
2271 c = Py_CHARMASK(*input++);
2272 if (Py_CHARMASK((*output++ = table[c])) != c)
2273 changed = 1;
2274 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002275 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276 return result;
2277 Py_DECREF(result);
2278 Py_INCREF(input_obj);
2279 return input_obj;
2280 }
2281
Guido van Rossumd8faa362007-04-27 19:54:29 +00002282 if (table == NULL) {
2283 for (i = 0; i < 256; i++)
2284 trans_table[i] = Py_CHARMASK(i);
2285 } else {
2286 for (i = 0; i < 256; i++)
2287 trans_table[i] = Py_CHARMASK(table[i]);
2288 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289
2290 for (i = 0; i < dellen; i++)
2291 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2292
2293 for (i = inlen; --i >= 0; ) {
2294 c = Py_CHARMASK(*input++);
2295 if (trans_table[c] != -1)
2296 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2297 continue;
2298 changed = 1;
2299 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002300 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301 Py_DECREF(result);
2302 Py_INCREF(input_obj);
2303 return input_obj;
2304 }
2305 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002306 if (inlen > 0)
2307 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 return result;
2309}
2310
2311
Thomas Wouters477c8d52006-05-27 19:21:47 +00002312#define FORWARD 1
2313#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314
Thomas Wouters477c8d52006-05-27 19:21:47 +00002315/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317#define findchar(target, target_len, c) \
2318 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320/* String ops must return a string. */
2321/* If the object is subclass of string, create a copy */
2322Py_LOCAL(PyStringObject *)
2323return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002325 if (PyString_CheckExact(self)) {
2326 Py_INCREF(self);
2327 return self;
2328 }
2329 return (PyStringObject *)PyString_FromStringAndSize(
2330 PyString_AS_STRING(self),
2331 PyString_GET_SIZE(self));
2332}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333
Thomas Wouters477c8d52006-05-27 19:21:47 +00002334Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002335countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002336{
2337 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002338 const char *start=target;
2339 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340
Thomas Wouters477c8d52006-05-27 19:21:47 +00002341 while ( (start=findchar(start, end-start, c)) != NULL ) {
2342 count++;
2343 if (count >= maxcount)
2344 break;
2345 start += 1;
2346 }
2347 return count;
2348}
2349
2350Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002351findstring(const char *target, Py_ssize_t target_len,
2352 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002353 Py_ssize_t start,
2354 Py_ssize_t end,
2355 int direction)
2356{
2357 if (start < 0) {
2358 start += target_len;
2359 if (start < 0)
2360 start = 0;
2361 }
2362 if (end > target_len) {
2363 end = target_len;
2364 } else if (end < 0) {
2365 end += target_len;
2366 if (end < 0)
2367 end = 0;
2368 }
2369
2370 /* zero-length substrings always match at the first attempt */
2371 if (pattern_len == 0)
2372 return (direction > 0) ? start : end;
2373
2374 end -= pattern_len;
2375
2376 if (direction < 0) {
2377 for (; end >= start; end--)
2378 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2379 return end;
2380 } else {
2381 for (; start <= end; start++)
2382 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2383 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384 }
2385 return -1;
2386}
2387
Thomas Wouters477c8d52006-05-27 19:21:47 +00002388Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002389countstring(const char *target, Py_ssize_t target_len,
2390 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002391 Py_ssize_t start,
2392 Py_ssize_t end,
2393 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002395 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396
Thomas Wouters477c8d52006-05-27 19:21:47 +00002397 if (start < 0) {
2398 start += target_len;
2399 if (start < 0)
2400 start = 0;
2401 }
2402 if (end > target_len) {
2403 end = target_len;
2404 } else if (end < 0) {
2405 end += target_len;
2406 if (end < 0)
2407 end = 0;
2408 }
2409
2410 /* zero-length substrings match everywhere */
2411 if (pattern_len == 0 || maxcount == 0) {
2412 if (target_len+1 < maxcount)
2413 return target_len+1;
2414 return maxcount;
2415 }
2416
2417 end -= pattern_len;
2418 if (direction < 0) {
2419 for (; (end >= start); end--)
2420 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2421 count++;
2422 if (--maxcount <= 0) break;
2423 end -= pattern_len-1;
2424 }
2425 } else {
2426 for (; (start <= end); start++)
2427 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2428 count++;
2429 if (--maxcount <= 0)
2430 break;
2431 start += pattern_len-1;
2432 }
2433 }
2434 return count;
2435}
2436
2437
2438/* Algorithms for different cases of string replacement */
2439
2440/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2441Py_LOCAL(PyStringObject *)
2442replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002443 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 Py_ssize_t maxcount)
2445{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002446 char *self_s, *result_s;
2447 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002448 Py_ssize_t count, i, product;
2449 PyStringObject *result;
2450
2451 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002452
Thomas Wouters477c8d52006-05-27 19:21:47 +00002453 /* 1 at the end plus 1 after every character */
2454 count = self_len+1;
2455 if (maxcount < count)
2456 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002457
Thomas Wouters477c8d52006-05-27 19:21:47 +00002458 /* Check for overflow */
2459 /* result_len = count * to_len + self_len; */
2460 product = count * to_len;
2461 if (product / to_len != count) {
2462 PyErr_SetString(PyExc_OverflowError,
2463 "replace string is too long");
2464 return NULL;
2465 }
2466 result_len = product + self_len;
2467 if (result_len < 0) {
2468 PyErr_SetString(PyExc_OverflowError,
2469 "replace string is too long");
2470 return NULL;
2471 }
2472
2473 if (! (result = (PyStringObject *)
2474 PyString_FromStringAndSize(NULL, result_len)) )
2475 return NULL;
2476
2477 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002478 result_s = PyString_AS_STRING(result);
2479
2480 /* TODO: special case single character, which doesn't need memcpy */
2481
2482 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002483 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002484 result_s += to_len;
2485 count -= 1;
2486
2487 for (i=0; i<count; i++) {
2488 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002489 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002490 result_s += to_len;
2491 }
2492
2493 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002494 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002495
2496 return result;
2497}
2498
2499/* Special case for deleting a single character */
2500/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2501Py_LOCAL(PyStringObject *)
2502replace_delete_single_character(PyStringObject *self,
2503 char from_c, Py_ssize_t maxcount)
2504{
2505 char *self_s, *result_s;
2506 char *start, *next, *end;
2507 Py_ssize_t self_len, result_len;
2508 Py_ssize_t count;
2509 PyStringObject *result;
2510
2511 self_len = PyString_GET_SIZE(self);
2512 self_s = PyString_AS_STRING(self);
2513
2514 count = countchar(self_s, self_len, from_c, maxcount);
2515 if (count == 0) {
2516 return return_self(self);
2517 }
2518
2519 result_len = self_len - count; /* from_len == 1 */
2520 assert(result_len>=0);
2521
2522 if ( (result = (PyStringObject *)
2523 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2524 return NULL;
2525 result_s = PyString_AS_STRING(result);
2526
2527 start = self_s;
2528 end = self_s + self_len;
2529 while (count-- > 0) {
2530 next = findchar(start, end-start, from_c);
2531 if (next == NULL)
2532 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002533 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 result_s += (next-start);
2535 start = next+1;
2536 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002537 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002538
Thomas Wouters477c8d52006-05-27 19:21:47 +00002539 return result;
2540}
2541
2542/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2543
2544Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002545replace_delete_substring(PyStringObject *self,
2546 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002547 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002548 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002549 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002550 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002551 Py_ssize_t count, offset;
2552 PyStringObject *result;
2553
2554 self_len = PyString_GET_SIZE(self);
2555 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556
2557 count = countstring(self_s, self_len,
2558 from_s, from_len,
2559 0, self_len, 1,
2560 maxcount);
2561
2562 if (count == 0) {
2563 /* no matches */
2564 return return_self(self);
2565 }
2566
2567 result_len = self_len - (count * from_len);
2568 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002569
Thomas Wouters477c8d52006-05-27 19:21:47 +00002570 if ( (result = (PyStringObject *)
2571 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2572 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002573
Thomas Wouters477c8d52006-05-27 19:21:47 +00002574 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002575
Thomas Wouters477c8d52006-05-27 19:21:47 +00002576 start = self_s;
2577 end = self_s + self_len;
2578 while (count-- > 0) {
2579 offset = findstring(start, end-start,
2580 from_s, from_len,
2581 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002582 if (offset == -1)
2583 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002584 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002585
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002586 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587
Thomas Wouters477c8d52006-05-27 19:21:47 +00002588 result_s += (next-start);
2589 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002591 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002592 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002593}
2594
Thomas Wouters477c8d52006-05-27 19:21:47 +00002595/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2596Py_LOCAL(PyStringObject *)
2597replace_single_character_in_place(PyStringObject *self,
2598 char from_c, char to_c,
2599 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002601 char *self_s, *result_s, *start, *end, *next;
2602 Py_ssize_t self_len;
2603 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002604
Thomas Wouters477c8d52006-05-27 19:21:47 +00002605 /* The result string will be the same size */
2606 self_s = PyString_AS_STRING(self);
2607 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002608
Thomas Wouters477c8d52006-05-27 19:21:47 +00002609 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002610
Thomas Wouters477c8d52006-05-27 19:21:47 +00002611 if (next == NULL) {
2612 /* No matches; return the original string */
2613 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002615
Thomas Wouters477c8d52006-05-27 19:21:47 +00002616 /* Need to make a new string */
2617 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2618 if (result == NULL)
2619 return NULL;
2620 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002621 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002622
Thomas Wouters477c8d52006-05-27 19:21:47 +00002623 /* change everything in-place, starting with this one */
2624 start = result_s + (next-self_s);
2625 *start = to_c;
2626 start++;
2627 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002628
Thomas Wouters477c8d52006-05-27 19:21:47 +00002629 while (--maxcount > 0) {
2630 next = findchar(start, end-start, from_c);
2631 if (next == NULL)
2632 break;
2633 *next = to_c;
2634 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002635 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002636
Thomas Wouters477c8d52006-05-27 19:21:47 +00002637 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638}
2639
Thomas Wouters477c8d52006-05-27 19:21:47 +00002640/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2641Py_LOCAL(PyStringObject *)
2642replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002643 const char *from_s, Py_ssize_t from_len,
2644 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002645 Py_ssize_t maxcount)
2646{
2647 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002648 char *self_s;
2649 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002650 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002651
Thomas Wouters477c8d52006-05-27 19:21:47 +00002652 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002653
Thomas Wouters477c8d52006-05-27 19:21:47 +00002654 self_s = PyString_AS_STRING(self);
2655 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002656
Thomas Wouters477c8d52006-05-27 19:21:47 +00002657 offset = findstring(self_s, self_len,
2658 from_s, from_len,
2659 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002660 if (offset == -1) {
2661 /* No matches; return the original string */
2662 return return_self(self);
2663 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002664
Thomas Wouters477c8d52006-05-27 19:21:47 +00002665 /* Need to make a new string */
2666 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2667 if (result == NULL)
2668 return NULL;
2669 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002670 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002671
Thomas Wouters477c8d52006-05-27 19:21:47 +00002672 /* change everything in-place, starting with this one */
2673 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002674 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002675 start += from_len;
2676 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677
Thomas Wouters477c8d52006-05-27 19:21:47 +00002678 while ( --maxcount > 0) {
2679 offset = findstring(start, end-start,
2680 from_s, from_len,
2681 0, end-start, FORWARD);
2682 if (offset==-1)
2683 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002684 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002685 start += offset+from_len;
2686 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002687
Thomas Wouters477c8d52006-05-27 19:21:47 +00002688 return result;
2689}
2690
2691/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2692Py_LOCAL(PyStringObject *)
2693replace_single_character(PyStringObject *self,
2694 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002695 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002696 Py_ssize_t maxcount)
2697{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002698 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002699 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 Py_ssize_t count, product;
2702 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002703
Thomas Wouters477c8d52006-05-27 19:21:47 +00002704 self_s = PyString_AS_STRING(self);
2705 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002706
Thomas Wouters477c8d52006-05-27 19:21:47 +00002707 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708 if (count == 0) {
2709 /* no matches, return unchanged */
2710 return return_self(self);
2711 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002712
Thomas Wouters477c8d52006-05-27 19:21:47 +00002713 /* use the difference between current and new, hence the "-1" */
2714 /* result_len = self_len + count * (to_len-1) */
2715 product = count * (to_len-1);
2716 if (product / (to_len-1) != count) {
2717 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2718 return NULL;
2719 }
2720 result_len = self_len + product;
2721 if (result_len < 0) {
2722 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2723 return NULL;
2724 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002725
Thomas Wouters477c8d52006-05-27 19:21:47 +00002726 if ( (result = (PyStringObject *)
2727 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2728 return NULL;
2729 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002730
Thomas Wouters477c8d52006-05-27 19:21:47 +00002731 start = self_s;
2732 end = self_s + self_len;
2733 while (count-- > 0) {
2734 next = findchar(start, end-start, from_c);
2735 if (next == NULL)
2736 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002737
Thomas Wouters477c8d52006-05-27 19:21:47 +00002738 if (next == start) {
2739 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002740 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002741 result_s += to_len;
2742 start += 1;
2743 } else {
2744 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002745 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002746 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002747 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002748 result_s += to_len;
2749 start = next+1;
2750 }
2751 }
2752 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002753 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002754
Thomas Wouters477c8d52006-05-27 19:21:47 +00002755 return result;
2756}
2757
2758/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2759Py_LOCAL(PyStringObject *)
2760replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002761 const char *from_s, Py_ssize_t from_len,
2762 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002763 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002764 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002765 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002766 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767 Py_ssize_t count, offset, product;
2768 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002769
Thomas Wouters477c8d52006-05-27 19:21:47 +00002770 self_s = PyString_AS_STRING(self);
2771 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002772
Thomas Wouters477c8d52006-05-27 19:21:47 +00002773 count = countstring(self_s, self_len,
2774 from_s, from_len,
2775 0, self_len, FORWARD, maxcount);
2776 if (count == 0) {
2777 /* no matches, return unchanged */
2778 return return_self(self);
2779 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002780
Thomas Wouters477c8d52006-05-27 19:21:47 +00002781 /* Check for overflow */
2782 /* result_len = self_len + count * (to_len-from_len) */
2783 product = count * (to_len-from_len);
2784 if (product / (to_len-from_len) != count) {
2785 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2786 return NULL;
2787 }
2788 result_len = self_len + product;
2789 if (result_len < 0) {
2790 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2791 return NULL;
2792 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002793
Thomas Wouters477c8d52006-05-27 19:21:47 +00002794 if ( (result = (PyStringObject *)
2795 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2796 return NULL;
2797 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002798
Thomas Wouters477c8d52006-05-27 19:21:47 +00002799 start = self_s;
2800 end = self_s + self_len;
2801 while (count-- > 0) {
2802 offset = findstring(start, end-start,
2803 from_s, from_len,
2804 0, end-start, FORWARD);
2805 if (offset == -1)
2806 break;
2807 next = start+offset;
2808 if (next == start) {
2809 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002810 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002811 result_s += to_len;
2812 start += from_len;
2813 } else {
2814 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002815 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002816 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002817 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002818 result_s += to_len;
2819 start = next+from_len;
2820 }
2821 }
2822 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002823 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002824
Thomas Wouters477c8d52006-05-27 19:21:47 +00002825 return result;
2826}
2827
2828
2829Py_LOCAL(PyStringObject *)
2830replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002831 const char *from_s, Py_ssize_t from_len,
2832 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002833 Py_ssize_t maxcount)
2834{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002835 if (maxcount < 0) {
2836 maxcount = PY_SSIZE_T_MAX;
2837 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2838 /* nothing to do; return the original string */
2839 return return_self(self);
2840 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002841
Thomas Wouters477c8d52006-05-27 19:21:47 +00002842 if (maxcount == 0 ||
2843 (from_len == 0 && to_len == 0)) {
2844 /* nothing to do; return the original string */
2845 return return_self(self);
2846 }
2847
2848 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002849
Thomas Wouters477c8d52006-05-27 19:21:47 +00002850 if (from_len == 0) {
2851 /* insert the 'to' string everywhere. */
2852 /* >>> "Python".replace("", ".") */
2853 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002854 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 }
2856
2857 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2858 /* point for an empty self string to generate a non-empty string */
2859 /* Special case so the remaining code always gets a non-empty string */
2860 if (PyString_GET_SIZE(self) == 0) {
2861 return return_self(self);
2862 }
2863
2864 if (to_len == 0) {
2865 /* delete all occurances of 'from' string */
2866 if (from_len == 1) {
2867 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002868 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002869 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002870 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002871 }
2872 }
2873
2874 /* Handle special case where both strings have the same length */
2875
2876 if (from_len == to_len) {
2877 if (from_len == 1) {
2878 return replace_single_character_in_place(
2879 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002880 from_s[0],
2881 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002882 maxcount);
2883 } else {
2884 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002885 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002886 }
2887 }
2888
2889 /* Otherwise use the more generic algorithms */
2890 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002891 return replace_single_character(self, from_s[0],
2892 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002893 } else {
2894 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002895 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002896 }
2897}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002898
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002899PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002900"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002901\n\
2902Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002903old replaced by new. If the optional argument count is\n\
2904given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002905
2906static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002907string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002908{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002909 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002910 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002911 const char *from_s, *to_s;
2912 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002913
Thomas Wouters477c8d52006-05-27 19:21:47 +00002914 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002915 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916
Thomas Wouters477c8d52006-05-27 19:21:47 +00002917 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002918 from_s = PyString_AS_STRING(from);
2919 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002921 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002922 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002923 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002924 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925 return NULL;
2926
Thomas Wouters477c8d52006-05-27 19:21:47 +00002927 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002928 to_s = PyString_AS_STRING(to);
2929 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002930 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002931 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002932 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002933 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002934 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002935 return NULL;
2936
Thomas Wouters477c8d52006-05-27 19:21:47 +00002937 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002938 from_s, from_len,
2939 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002940}
2941
Thomas Wouters477c8d52006-05-27 19:21:47 +00002942/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002943
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002944/* Matches the end (direction >= 0) or start (direction < 0) of self
2945 * against substr, using the start and end arguments. Returns
2946 * -1 on error, 0 if not found and 1 if found.
2947 */
2948Py_LOCAL(int)
2949_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2950 Py_ssize_t end, int direction)
2951{
2952 Py_ssize_t len = PyString_GET_SIZE(self);
2953 Py_ssize_t slen;
2954 const char* sub;
2955 const char* str;
2956
2957 if (PyString_Check(substr)) {
2958 sub = PyString_AS_STRING(substr);
2959 slen = PyString_GET_SIZE(substr);
2960 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002961 else if (PyUnicode_Check(substr))
2962 return PyUnicode_Tailmatch((PyObject *)self,
2963 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002964 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2965 return -1;
2966 str = PyString_AS_STRING(self);
2967
2968 string_adjust_indices(&start, &end, len);
2969
2970 if (direction < 0) {
2971 /* startswith */
2972 if (start+slen > len)
2973 return 0;
2974 } else {
2975 /* endswith */
2976 if (end-start < slen || start > len)
2977 return 0;
2978
2979 if (end-slen > start)
2980 start = end - slen;
2981 }
2982 if (end-start >= slen)
2983 return ! memcmp(str+start, sub, slen);
2984 return 0;
2985}
2986
2987
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002988PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002989"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002990\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002991Return True if S starts with the specified prefix, False otherwise.\n\
2992With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002993With optional end, stop comparing S at that position.\n\
2994prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002995
2996static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002997string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002998{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002999 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003000 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003002 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003003
Guido van Rossumc6821402000-05-08 14:08:05 +00003004 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3005 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003006 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003007 if (PyTuple_Check(subobj)) {
3008 Py_ssize_t i;
3009 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3010 result = _string_tailmatch(self,
3011 PyTuple_GET_ITEM(subobj, i),
3012 start, end, -1);
3013 if (result == -1)
3014 return NULL;
3015 else if (result) {
3016 Py_RETURN_TRUE;
3017 }
3018 }
3019 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003020 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003021 result = _string_tailmatch(self, subobj, start, end, -1);
3022 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003023 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003024 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003025 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003026}
3027
3028
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003029PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003030"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003031\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003032Return True if S ends with the specified suffix, False otherwise.\n\
3033With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003034With optional end, stop comparing S at that position.\n\
3035suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003036
3037static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003038string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003040 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003041 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003042 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003043 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044
Guido van Rossumc6821402000-05-08 14:08:05 +00003045 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3046 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003047 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003048 if (PyTuple_Check(subobj)) {
3049 Py_ssize_t i;
3050 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3051 result = _string_tailmatch(self,
3052 PyTuple_GET_ITEM(subobj, i),
3053 start, end, +1);
3054 if (result == -1)
3055 return NULL;
3056 else if (result) {
3057 Py_RETURN_TRUE;
3058 }
3059 }
3060 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003062 result = _string_tailmatch(self, subobj, start, end, +1);
3063 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003065 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003066 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003067}
3068
3069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003070PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003071"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003072\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003073Encodes S using the codec registered for encoding. encoding defaults\n\
3074to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003075handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003076a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3077'xmlcharrefreplace' as well as any other name registered with\n\
3078codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003079
3080static PyObject *
3081string_encode(PyStringObject *self, PyObject *args)
3082{
3083 char *encoding = NULL;
3084 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003085 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003086
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003087 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3088 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003089 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003090 if (v == NULL)
3091 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003092 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003093 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003094 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003095 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003096 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003097 Py_DECREF(v);
3098 return NULL;
3099 }
3100 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003101
3102 onError:
3103 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003104}
3105
3106
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003107PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003108"S.decode([encoding[,errors]]) -> object\n\
3109\n\
3110Decodes S using the codec registered for encoding. encoding defaults\n\
3111to the default encoding. errors may be given to set a different error\n\
3112handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003113a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3114as well as any other name registerd with codecs.register_error that is\n\
3115able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003116
3117static PyObject *
3118string_decode(PyStringObject *self, PyObject *args)
3119{
3120 char *encoding = NULL;
3121 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003122 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003123
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003124 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3125 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003126 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003127 if (v == NULL)
3128 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003129 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3130 PyErr_Format(PyExc_TypeError,
3131 "decoder did not return a string/unicode object "
3132 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003133 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003134 Py_DECREF(v);
3135 return NULL;
3136 }
3137 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003138
3139 onError:
3140 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003141}
3142
3143
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003144PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145"S.expandtabs([tabsize]) -> string\n\
3146\n\
3147Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003148If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149
3150static PyObject*
3151string_expandtabs(PyStringObject *self, PyObject *args)
3152{
3153 const char *e, *p;
3154 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003155 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 PyObject *u;
3157 int tabsize = 8;
3158
3159 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3160 return NULL;
3161
Thomas Wouters7e474022000-07-16 12:04:32 +00003162 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003163 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3165 for (p = PyString_AS_STRING(self); p < e; p++)
3166 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003167 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003169 if (old_j > j) {
3170 PyErr_SetString(PyExc_OverflowError,
3171 "new string is too long");
3172 return NULL;
3173 }
3174 old_j = j;
3175 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 }
3177 else {
3178 j++;
3179 if (*p == '\n' || *p == '\r') {
3180 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003181 old_j = j = 0;
3182 if (i < 0) {
3183 PyErr_SetString(PyExc_OverflowError,
3184 "new string is too long");
3185 return NULL;
3186 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187 }
3188 }
3189
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003190 if ((i + j) < 0) {
3191 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3192 return NULL;
3193 }
3194
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 /* Second pass: create output string and fill it */
3196 u = PyString_FromStringAndSize(NULL, i + j);
3197 if (!u)
3198 return NULL;
3199
3200 j = 0;
3201 q = PyString_AS_STRING(u);
3202
3203 for (p = PyString_AS_STRING(self); p < e; p++)
3204 if (*p == '\t') {
3205 if (tabsize > 0) {
3206 i = tabsize - (j % tabsize);
3207 j += i;
3208 while (i--)
3209 *q++ = ' ';
3210 }
3211 }
3212 else {
3213 j++;
3214 *q++ = *p;
3215 if (*p == '\n' || *p == '\r')
3216 j = 0;
3217 }
3218
3219 return u;
3220}
3221
Thomas Wouters477c8d52006-05-27 19:21:47 +00003222Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003223pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224{
3225 PyObject *u;
3226
3227 if (left < 0)
3228 left = 0;
3229 if (right < 0)
3230 right = 0;
3231
Tim Peters8fa5dd02001-09-12 02:18:30 +00003232 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 Py_INCREF(self);
3234 return (PyObject *)self;
3235 }
3236
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003237 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238 left + PyString_GET_SIZE(self) + right);
3239 if (u) {
3240 if (left)
3241 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003242 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003243 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003244 PyString_GET_SIZE(self));
3245 if (right)
3246 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3247 fill, right);
3248 }
3249
3250 return u;
3251}
3252
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003253PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003254"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003255"\n"
3256"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003257"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003258
3259static PyObject *
3260string_ljust(PyStringObject *self, PyObject *args)
3261{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003262 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003263 char fillchar = ' ';
3264
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003265 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 return NULL;
3267
Tim Peters8fa5dd02001-09-12 02:18:30 +00003268 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003269 Py_INCREF(self);
3270 return (PyObject*) self;
3271 }
3272
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003273 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003274}
3275
3276
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003277PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003278"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003279"\n"
3280"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003281"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003282
3283static PyObject *
3284string_rjust(PyStringObject *self, PyObject *args)
3285{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003286 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003287 char fillchar = ' ';
3288
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003289 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290 return NULL;
3291
Tim Peters8fa5dd02001-09-12 02:18:30 +00003292 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003293 Py_INCREF(self);
3294 return (PyObject*) self;
3295 }
3296
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003297 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298}
3299
3300
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003301PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003302"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003303"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003304"Return S centered in a string of length width. Padding is\n"
3305"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003306
3307static PyObject *
3308string_center(PyStringObject *self, PyObject *args)
3309{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003310 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003311 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003312 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003314 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003315 return NULL;
3316
Tim Peters8fa5dd02001-09-12 02:18:30 +00003317 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003318 Py_INCREF(self);
3319 return (PyObject*) self;
3320 }
3321
3322 marg = width - PyString_GET_SIZE(self);
3323 left = marg / 2 + (marg & width & 1);
3324
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003325 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003326}
3327
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003328PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003329"S.zfill(width) -> string\n"
3330"\n"
3331"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003332"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003333
3334static PyObject *
3335string_zfill(PyStringObject *self, PyObject *args)
3336{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003337 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003338 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003339 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003340 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003341
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003342 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003343 return NULL;
3344
3345 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003346 if (PyString_CheckExact(self)) {
3347 Py_INCREF(self);
3348 return (PyObject*) self;
3349 }
3350 else
3351 return PyString_FromStringAndSize(
3352 PyString_AS_STRING(self),
3353 PyString_GET_SIZE(self)
3354 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003355 }
3356
3357 fill = width - PyString_GET_SIZE(self);
3358
3359 s = pad(self, fill, 0, '0');
3360
3361 if (s == NULL)
3362 return NULL;
3363
3364 p = PyString_AS_STRING(s);
3365 if (p[fill] == '+' || p[fill] == '-') {
3366 /* move sign to beginning of string */
3367 p[0] = p[fill];
3368 p[fill] = '0';
3369 }
3370
3371 return (PyObject*) s;
3372}
3373
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003374PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003375"S.isspace() -> bool\n\
3376\n\
3377Return True if all characters in S are whitespace\n\
3378and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379
3380static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003381string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382{
Fred Drakeba096332000-07-09 07:04:36 +00003383 register const unsigned char *p
3384 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003385 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 /* Shortcut for single character strings */
3388 if (PyString_GET_SIZE(self) == 1 &&
3389 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003390 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003391
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003392 /* Special case for empty strings */
3393 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003394 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003395
Guido van Rossum4c08d552000-03-10 22:55:18 +00003396 e = p + PyString_GET_SIZE(self);
3397 for (; p < e; p++) {
3398 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003399 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003400 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003401 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402}
3403
3404
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003405PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003406"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003407\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003408Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003409and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003410
3411static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003412string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003413{
Fred Drakeba096332000-07-09 07:04:36 +00003414 register const unsigned char *p
3415 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003416 register const unsigned char *e;
3417
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003418 /* Shortcut for single character strings */
3419 if (PyString_GET_SIZE(self) == 1 &&
3420 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003421 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003422
3423 /* Special case for empty strings */
3424 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003425 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003426
3427 e = p + PyString_GET_SIZE(self);
3428 for (; p < e; p++) {
3429 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003430 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003431 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003432 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003433}
3434
3435
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003436PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003437"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003438\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003439Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003440and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003441
3442static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003443string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003444{
Fred Drakeba096332000-07-09 07:04:36 +00003445 register const unsigned char *p
3446 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003447 register const unsigned char *e;
3448
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003449 /* Shortcut for single character strings */
3450 if (PyString_GET_SIZE(self) == 1 &&
3451 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003452 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003453
3454 /* Special case for empty strings */
3455 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003456 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003457
3458 e = p + PyString_GET_SIZE(self);
3459 for (; p < e; p++) {
3460 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003461 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003462 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003463 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003464}
3465
3466
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003467PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003468"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003470Return True if all characters in S are digits\n\
3471and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003472
3473static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003474string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475{
Fred Drakeba096332000-07-09 07:04:36 +00003476 register const unsigned char *p
3477 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003478 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003479
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480 /* Shortcut for single character strings */
3481 if (PyString_GET_SIZE(self) == 1 &&
3482 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003483 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003485 /* Special case for empty strings */
3486 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003487 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003488
Guido van Rossum4c08d552000-03-10 22:55:18 +00003489 e = p + PyString_GET_SIZE(self);
3490 for (; p < e; p++) {
3491 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003492 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003494 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495}
3496
3497
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003498PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003499"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003501Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003502at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503
3504static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003505string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506{
Fred Drakeba096332000-07-09 07:04:36 +00003507 register const unsigned char *p
3508 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003509 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510 int cased;
3511
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512 /* Shortcut for single character strings */
3513 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003514 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003516 /* Special case for empty strings */
3517 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003519
Guido van Rossum4c08d552000-03-10 22:55:18 +00003520 e = p + PyString_GET_SIZE(self);
3521 cased = 0;
3522 for (; p < e; p++) {
3523 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003524 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525 else if (!cased && islower(*p))
3526 cased = 1;
3527 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003528 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529}
3530
3531
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003532PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003533"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003535Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003536at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537
3538static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003539string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540{
Fred Drakeba096332000-07-09 07:04:36 +00003541 register const unsigned char *p
3542 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003543 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544 int cased;
3545
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546 /* Shortcut for single character strings */
3547 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003550 /* Special case for empty strings */
3551 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003552 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003553
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554 e = p + PyString_GET_SIZE(self);
3555 cased = 0;
3556 for (; p < e; p++) {
3557 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003558 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003559 else if (!cased && isupper(*p))
3560 cased = 1;
3561 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003563}
3564
3565
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003566PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003568\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003569Return True if S is a titlecased string and there is at least one\n\
3570character in S, i.e. uppercase characters may only follow uncased\n\
3571characters and lowercase characters only cased ones. Return False\n\
3572otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003573
3574static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003575string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003576{
Fred Drakeba096332000-07-09 07:04:36 +00003577 register const unsigned char *p
3578 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003579 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003580 int cased, previous_is_cased;
3581
Guido van Rossum4c08d552000-03-10 22:55:18 +00003582 /* Shortcut for single character strings */
3583 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003586 /* Special case for empty strings */
3587 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003589
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590 e = p + PyString_GET_SIZE(self);
3591 cased = 0;
3592 previous_is_cased = 0;
3593 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003594 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595
3596 if (isupper(ch)) {
3597 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599 previous_is_cased = 1;
3600 cased = 1;
3601 }
3602 else if (islower(ch)) {
3603 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003605 previous_is_cased = 1;
3606 cased = 1;
3607 }
3608 else
3609 previous_is_cased = 0;
3610 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612}
3613
3614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003616"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617\n\
3618Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003619Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003620is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622static PyObject*
3623string_splitlines(PyStringObject *self, PyObject *args)
3624{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003625 register Py_ssize_t i;
3626 register Py_ssize_t j;
3627 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003628 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 PyObject *list;
3630 PyObject *str;
3631 char *data;
3632
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003633 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634 return NULL;
3635
3636 data = PyString_AS_STRING(self);
3637 len = PyString_GET_SIZE(self);
3638
Thomas Wouters477c8d52006-05-27 19:21:47 +00003639 /* This does not use the preallocated list because splitlines is
3640 usually run with hundreds of newlines. The overhead of
3641 switching between PyList_SET_ITEM and append causes about a
3642 2-3% slowdown for that common case. A smarter implementation
3643 could move the if check out, so the SET_ITEMs are done first
3644 and the appends only done when the prealloc buffer is full.
3645 That's too much work for little gain.*/
3646
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647 list = PyList_New(0);
3648 if (!list)
3649 goto onError;
3650
3651 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003652 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003653
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 /* Find a line and append it */
3655 while (i < len && data[i] != '\n' && data[i] != '\r')
3656 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657
3658 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003659 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 if (i < len) {
3661 if (data[i] == '\r' && i + 1 < len &&
3662 data[i+1] == '\n')
3663 i += 2;
3664 else
3665 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003666 if (keepends)
3667 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003669 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 j = i;
3671 }
3672 if (j < len) {
3673 SPLIT_APPEND(data, j, len);
3674 }
3675
3676 return list;
3677
3678 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003679 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 return NULL;
3681}
3682
3683#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003684#undef SPLIT_ADD
3685#undef MAX_PREALLOC
3686#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003688static PyObject *
3689string_getnewargs(PyStringObject *v)
3690{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003691 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003692}
3693
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003694
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003695static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003696string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003697 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3698 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003699 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003700 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3701 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003702 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3703 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3704 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3705 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3706 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3707 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3708 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003709 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3710 capitalize__doc__},
3711 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3712 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3713 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003714 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003715 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3716 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3717 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3718 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3719 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3720 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3721 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003722 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3723 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003724 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3725 startswith__doc__},
3726 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3727 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3728 swapcase__doc__},
3729 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3730 translate__doc__},
3731 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3732 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3733 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3734 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3735 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3736 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3737 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3738 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3739 expandtabs__doc__},
3740 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3741 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003742 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003743 {NULL, NULL} /* sentinel */
3744};
3745
Jeremy Hylton938ace62002-07-17 16:30:39 +00003746static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003747str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3748
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003749static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003750string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003751{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003752 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003753 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003754
Guido van Rossumae960af2001-08-30 03:11:59 +00003755 if (type != &PyString_Type)
3756 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003757 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003758 return NULL;
3759 if (x == NULL)
3760 return PyString_FromString("");
3761 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003762}
3763
Guido van Rossumae960af2001-08-30 03:11:59 +00003764static PyObject *
3765str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3766{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003767 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003768 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003769
3770 assert(PyType_IsSubtype(type, &PyString_Type));
3771 tmp = string_new(&PyString_Type, args, kwds);
3772 if (tmp == NULL)
3773 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003774 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003775 n = PyString_GET_SIZE(tmp);
3776 pnew = type->tp_alloc(type, n);
3777 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003778 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003779 ((PyStringObject *)pnew)->ob_shash =
3780 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003781 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003782 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003783 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003784 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003785}
3786
Guido van Rossumcacfc072002-05-24 19:01:59 +00003787static PyObject *
3788basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3789{
3790 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003791 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003792 return NULL;
3793}
3794
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003795static PyObject *
3796string_mod(PyObject *v, PyObject *w)
3797{
3798 if (!PyString_Check(v)) {
3799 Py_INCREF(Py_NotImplemented);
3800 return Py_NotImplemented;
3801 }
3802 return PyString_Format(v, w);
3803}
3804
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003805PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003806"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003807
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003808static PyNumberMethods string_as_number = {
3809 0, /*nb_add*/
3810 0, /*nb_subtract*/
3811 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003812 string_mod, /*nb_remainder*/
3813};
3814
3815
Guido van Rossumcacfc072002-05-24 19:01:59 +00003816PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003817 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003818 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003819 0,
3820 0,
3821 0, /* tp_dealloc */
3822 0, /* tp_print */
3823 0, /* tp_getattr */
3824 0, /* tp_setattr */
3825 0, /* tp_compare */
3826 0, /* tp_repr */
3827 0, /* tp_as_number */
3828 0, /* tp_as_sequence */
3829 0, /* tp_as_mapping */
3830 0, /* tp_hash */
3831 0, /* tp_call */
3832 0, /* tp_str */
3833 0, /* tp_getattro */
3834 0, /* tp_setattro */
3835 0, /* tp_as_buffer */
3836 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3837 basestring_doc, /* tp_doc */
3838 0, /* tp_traverse */
3839 0, /* tp_clear */
3840 0, /* tp_richcompare */
3841 0, /* tp_weaklistoffset */
3842 0, /* tp_iter */
3843 0, /* tp_iternext */
3844 0, /* tp_methods */
3845 0, /* tp_members */
3846 0, /* tp_getset */
3847 &PyBaseObject_Type, /* tp_base */
3848 0, /* tp_dict */
3849 0, /* tp_descr_get */
3850 0, /* tp_descr_set */
3851 0, /* tp_dictoffset */
3852 0, /* tp_init */
3853 0, /* tp_alloc */
3854 basestring_new, /* tp_new */
3855 0, /* tp_free */
3856};
3857
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003858PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003859"str(object) -> string\n\
3860\n\
3861Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003862If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003863
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003864static PyObject *str_iter(PyObject *seq);
3865
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003867 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003868 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003869 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003870 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003871 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003872 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003873 0, /* tp_getattr */
3874 0, /* tp_setattr */
3875 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003876 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003877 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003878 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003879 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003880 (hashfunc)string_hash, /* tp_hash */
3881 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003882 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003883 PyObject_GenericGetAttr, /* tp_getattro */
3884 0, /* tp_setattro */
3885 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003886 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3887 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003888 string_doc, /* tp_doc */
3889 0, /* tp_traverse */
3890 0, /* tp_clear */
3891 (richcmpfunc)string_richcompare, /* tp_richcompare */
3892 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003893 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003894 0, /* tp_iternext */
3895 string_methods, /* tp_methods */
3896 0, /* tp_members */
3897 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003898 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003899 0, /* tp_dict */
3900 0, /* tp_descr_get */
3901 0, /* tp_descr_set */
3902 0, /* tp_dictoffset */
3903 0, /* tp_init */
3904 0, /* tp_alloc */
3905 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003906 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003907};
3908
3909void
Fred Drakeba096332000-07-09 07:04:36 +00003910PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003911{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003912 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003913 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003914 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003915 if (w == NULL || !PyString_Check(*pv)) {
3916 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003917 *pv = NULL;
3918 return;
3919 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003920 v = string_concat((PyStringObject *) *pv, w);
3921 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003922 *pv = v;
3923}
3924
Guido van Rossum013142a1994-08-30 08:19:36 +00003925void
Fred Drakeba096332000-07-09 07:04:36 +00003926PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003927{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 PyString_Concat(pv, w);
3929 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003930}
3931
3932
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003933/* The following function breaks the notion that strings are immutable:
3934 it changes the size of a string. We get away with this only if there
3935 is only one module referencing the object. You can also think of it
3936 as creating a new string object and destroying the old one, only
3937 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003938 already be known to some other part of the code...
3939 Note that if there's not enough memory to resize the string, the original
3940 string object at *pv is deallocated, *pv is set to NULL, an "out of
3941 memory" exception is set, and -1 is returned. Else (on success) 0 is
3942 returned, and the value in *pv may or may not be the same as on input.
3943 As always, an extra byte is allocated for a trailing \0 byte (newsize
3944 does *not* include that), and a trailing \0 byte is stored.
3945*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003946
3947int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003948_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003949{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003950 register PyObject *v;
3951 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003952 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003953 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00003954 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003955 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003956 Py_DECREF(v);
3957 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003958 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003959 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003960 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003961 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003962 _Py_ForgetReference(v);
3963 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003964 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003965 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003966 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003967 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003968 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003969 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003970 _Py_NewReference(*pv);
3971 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003972 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003973 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003974 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003975 return 0;
3976}
Guido van Rossume5372401993-03-16 12:15:04 +00003977
3978/* Helpers for formatstring */
3979
Thomas Wouters477c8d52006-05-27 19:21:47 +00003980Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00003981getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003982{
Thomas Wouters977485d2006-02-16 15:59:12 +00003983 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003984 if (argidx < arglen) {
3985 (*p_argidx)++;
3986 if (arglen < 0)
3987 return args;
3988 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003989 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003990 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 PyErr_SetString(PyExc_TypeError,
3992 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003993 return NULL;
3994}
3995
Tim Peters38fd5b62000-09-21 05:43:11 +00003996/* Format codes
3997 * F_LJUST '-'
3998 * F_SIGN '+'
3999 * F_BLANK ' '
4000 * F_ALT '#'
4001 * F_ZERO '0'
4002 */
Guido van Rossume5372401993-03-16 12:15:04 +00004003#define F_LJUST (1<<0)
4004#define F_SIGN (1<<1)
4005#define F_BLANK (1<<2)
4006#define F_ALT (1<<3)
4007#define F_ZERO (1<<4)
4008
Thomas Wouters477c8d52006-05-27 19:21:47 +00004009Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004010formatfloat(char *buf, size_t buflen, int flags,
4011 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004012{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004013 /* fmt = '%#.' + `prec` + `type`
4014 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004015 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004016 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004017 x = PyFloat_AsDouble(v);
4018 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004019 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004020 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004021 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004022 }
Guido van Rossume5372401993-03-16 12:15:04 +00004023 if (prec < 0)
4024 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004025 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4026 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004027 /* Worst case length calc to ensure no buffer overrun:
4028
4029 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004030 fmt = %#.<prec>g
4031 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004032 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004033 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004034
4035 'f' formats:
4036 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4037 len = 1 + 50 + 1 + prec = 52 + prec
4038
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004039 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004040 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004041
4042 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004043 if (((type == 'g' || type == 'G') &&
4044 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004045 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004046 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004047 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004048 return -1;
4049 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004050 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4051 (flags&F_ALT) ? "#" : "",
4052 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004053 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004054 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004055}
4056
Tim Peters38fd5b62000-09-21 05:43:11 +00004057/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4058 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4059 * Python's regular ints.
4060 * Return value: a new PyString*, or NULL if error.
4061 * . *pbuf is set to point into it,
4062 * *plen set to the # of chars following that.
4063 * Caller must decref it when done using pbuf.
4064 * The string starting at *pbuf is of the form
4065 * "-"? ("0x" | "0X")? digit+
4066 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004067 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004068 * There will be at least prec digits, zero-filled on the left if
4069 * necessary to get that many.
4070 * val object to be converted
4071 * flags bitmask of format flags; only F_ALT is looked at
4072 * prec minimum number of digits; 0-fill on left if needed
4073 * type a character in [duoxX]; u acts the same as d
4074 *
4075 * CAUTION: o, x and X conversions on regular ints can never
4076 * produce a '-' sign, but can for Python's unbounded ints.
4077 */
4078PyObject*
4079_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4080 char **pbuf, int *plen)
4081{
4082 PyObject *result = NULL;
4083 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004084 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004085 int sign; /* 1 if '-', else 0 */
4086 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004087 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004088 int numdigits; /* len == numnondigits + numdigits */
4089 int numnondigits = 0;
4090
Guido van Rossumddefaf32007-01-14 03:31:43 +00004091 /* Avoid exceeding SSIZE_T_MAX */
4092 if (prec > PY_SSIZE_T_MAX-3) {
4093 PyErr_SetString(PyExc_OverflowError,
4094 "precision too large");
4095 return NULL;
4096 }
4097
Tim Peters38fd5b62000-09-21 05:43:11 +00004098 switch (type) {
4099 case 'd':
4100 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00004101 /* Special-case boolean: we want 0/1 */
4102 if (PyBool_Check(val))
4103 result = PyNumber_ToBase(val, 10);
4104 else
4105 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004106 break;
4107 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004108 numnondigits = 2;
4109 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004110 break;
4111 case 'x':
4112 case 'X':
4113 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004114 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004115 break;
4116 default:
4117 assert(!"'type' not in [duoxX]");
4118 }
4119 if (!result)
4120 return NULL;
4121
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004122 buf = PyString_AsString(result);
4123 if (!buf) {
4124 Py_DECREF(result);
4125 return NULL;
4126 }
4127
Tim Peters38fd5b62000-09-21 05:43:11 +00004128 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004129 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004130 PyErr_BadInternalCall();
4131 return NULL;
4132 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004133 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004134 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004135 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4136 return NULL;
4137 }
4138 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004139 if (buf[len-1] == 'L') {
4140 --len;
4141 buf[len] = '\0';
4142 }
4143 sign = buf[0] == '-';
4144 numnondigits += sign;
4145 numdigits = len - numnondigits;
4146 assert(numdigits > 0);
4147
Tim Petersfff53252001-04-12 18:38:48 +00004148 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004149 if (((flags & F_ALT) == 0 &&
4150 (type == 'o' || type == 'x' || type == 'X'))) {
4151 assert(buf[sign] == '0');
4152 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4153 buf[sign+1] == 'o');
4154 numnondigits -= 2;
4155 buf += 2;
4156 len -= 2;
4157 if (sign)
4158 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004159 assert(len == numnondigits + numdigits);
4160 assert(numdigits > 0);
4161 }
4162
4163 /* Fill with leading zeroes to meet minimum width. */
4164 if (prec > numdigits) {
4165 PyObject *r1 = PyString_FromStringAndSize(NULL,
4166 numnondigits + prec);
4167 char *b1;
4168 if (!r1) {
4169 Py_DECREF(result);
4170 return NULL;
4171 }
4172 b1 = PyString_AS_STRING(r1);
4173 for (i = 0; i < numnondigits; ++i)
4174 *b1++ = *buf++;
4175 for (i = 0; i < prec - numdigits; i++)
4176 *b1++ = '0';
4177 for (i = 0; i < numdigits; i++)
4178 *b1++ = *buf++;
4179 *b1 = '\0';
4180 Py_DECREF(result);
4181 result = r1;
4182 buf = PyString_AS_STRING(result);
4183 len = numnondigits + prec;
4184 }
4185
4186 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004187 if (type == 'X') {
4188 /* Need to convert all lower case letters to upper case.
4189 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004190 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004191 if (buf[i] >= 'a' && buf[i] <= 'x')
4192 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004193 }
4194 *pbuf = buf;
4195 *plen = len;
4196 return result;
4197}
4198
Thomas Wouters477c8d52006-05-27 19:21:47 +00004199Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004200formatint(char *buf, size_t buflen, int flags,
4201 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004202{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004203 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004204 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4205 + 1 + 1 = 24 */
4206 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004207 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004208 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004209
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004210 x = PyInt_AsLong(v);
4211 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004212 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004213 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004214 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004215 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004216 if (x < 0 && type == 'u') {
4217 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004218 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004219 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4220 sign = "-";
4221 else
4222 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004223 if (prec < 0)
4224 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004225
4226 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004227 (type == 'x' || type == 'X' || type == 'o')) {
4228 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004229 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004230 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004231 * - when 0 is being converted, the C standard leaves off
4232 * the '0x' or '0X', which is inconsistent with other
4233 * %#x/%#X conversions and inconsistent with Python's
4234 * hex() function
4235 * - there are platforms that violate the standard and
4236 * convert 0 with the '0x' or '0X'
4237 * (Metrowerks, Compaq Tru64)
4238 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004239 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004240 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004241 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004242 * We can achieve the desired consistency by inserting our
4243 * own '0x' or '0X' prefix, and substituting %x/%X in place
4244 * of %#x/%#X.
4245 *
4246 * Note that this is the same approach as used in
4247 * formatint() in unicodeobject.c
4248 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004249 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4250 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004251 }
4252 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004253 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4254 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004255 prec, type);
4256 }
4257
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004258 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004259 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004260 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004261 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004262 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004263 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004264 return -1;
4265 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004266 if (sign[0])
4267 PyOS_snprintf(buf, buflen, fmt, -x);
4268 else
4269 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004270 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004271}
4272
Thomas Wouters477c8d52006-05-27 19:21:47 +00004273Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004274formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004275{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004276 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004277 if (PyString_Check(v)) {
4278 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004279 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004280 }
4281 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004282 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004283 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004284 }
4285 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004286 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004287}
4288
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004289/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4290
4291 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4292 chars are formatted. XXX This is a magic number. Each formatting
4293 routine does bounds checking to ensure no overflow, but a better
4294 solution may be to malloc a buffer of appropriate size for each
4295 format. For now, the current solution is sufficient.
4296*/
4297#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004298
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004299PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004300PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004301{
4302 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004303 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004304 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004305 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004306 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004307 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004308 PyObject *dict = NULL;
4309 if (format == NULL || !PyString_Check(format) || args == NULL) {
4310 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004311 return NULL;
4312 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004313 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004314 fmt = PyString_AS_STRING(format);
4315 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004316 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004317 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004318 if (result == NULL)
4319 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004320 res = PyString_AsString(result);
4321 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004322 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004323 argidx = 0;
4324 }
4325 else {
4326 arglen = -1;
4327 argidx = -2;
4328 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004329 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004330 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004331 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004332 while (--fmtcnt >= 0) {
4333 if (*fmt != '%') {
4334 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004335 rescnt = fmtcnt + 100;
4336 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004337 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004338 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004339 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004340 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004341 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004342 }
4343 *res++ = *fmt++;
4344 }
4345 else {
4346 /* Got a format specifier */
4347 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004348 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004349 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004350 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004351 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004352 PyObject *v = NULL;
4353 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004354 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004355 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004356 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004357 char formatbuf[FORMATBUFLEN];
4358 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004359 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004360 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004361
Guido van Rossumda9c2711996-12-05 21:58:58 +00004362 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004363 if (*fmt == '(') {
4364 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004365 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004366 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004367 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004368
4369 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004370 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004371 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004372 goto error;
4373 }
4374 ++fmt;
4375 --fmtcnt;
4376 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004377 /* Skip over balanced parentheses */
4378 while (pcount > 0 && --fmtcnt >= 0) {
4379 if (*fmt == ')')
4380 --pcount;
4381 else if (*fmt == '(')
4382 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004383 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004384 }
4385 keylen = fmt - keystart - 1;
4386 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004387 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004388 "incomplete format key");
4389 goto error;
4390 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004391 key = PyString_FromStringAndSize(keystart,
4392 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004393 if (key == NULL)
4394 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004395 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004396 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004397 args_owned = 0;
4398 }
4399 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004400 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004401 if (args == NULL) {
4402 goto error;
4403 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004404 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004405 arglen = -1;
4406 argidx = -2;
4407 }
Guido van Rossume5372401993-03-16 12:15:04 +00004408 while (--fmtcnt >= 0) {
4409 switch (c = *fmt++) {
4410 case '-': flags |= F_LJUST; continue;
4411 case '+': flags |= F_SIGN; continue;
4412 case ' ': flags |= F_BLANK; continue;
4413 case '#': flags |= F_ALT; continue;
4414 case '0': flags |= F_ZERO; continue;
4415 }
4416 break;
4417 }
4418 if (c == '*') {
4419 v = getnextarg(args, arglen, &argidx);
4420 if (v == NULL)
4421 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004422 if (!PyInt_Check(v)) {
4423 PyErr_SetString(PyExc_TypeError,
4424 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004425 goto error;
4426 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004427 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004428 if (width == -1 && PyErr_Occurred())
4429 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004430 if (width < 0) {
4431 flags |= F_LJUST;
4432 width = -width;
4433 }
Guido van Rossume5372401993-03-16 12:15:04 +00004434 if (--fmtcnt >= 0)
4435 c = *fmt++;
4436 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004437 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004438 width = c - '0';
4439 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004440 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004441 if (!isdigit(c))
4442 break;
4443 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004444 PyErr_SetString(
4445 PyExc_ValueError,
4446 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004447 goto error;
4448 }
4449 width = width*10 + (c - '0');
4450 }
4451 }
4452 if (c == '.') {
4453 prec = 0;
4454 if (--fmtcnt >= 0)
4455 c = *fmt++;
4456 if (c == '*') {
4457 v = getnextarg(args, arglen, &argidx);
4458 if (v == NULL)
4459 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 if (!PyInt_Check(v)) {
4461 PyErr_SetString(
4462 PyExc_TypeError,
4463 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004464 goto error;
4465 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004466 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004467 if (prec == -1 && PyErr_Occurred())
4468 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004469 if (prec < 0)
4470 prec = 0;
4471 if (--fmtcnt >= 0)
4472 c = *fmt++;
4473 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004474 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004475 prec = c - '0';
4476 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004477 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004478 if (!isdigit(c))
4479 break;
4480 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 PyErr_SetString(
4482 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004483 "prec too big");
4484 goto error;
4485 }
4486 prec = prec*10 + (c - '0');
4487 }
4488 }
4489 } /* prec */
4490 if (fmtcnt >= 0) {
4491 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004492 if (--fmtcnt >= 0)
4493 c = *fmt++;
4494 }
4495 }
4496 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004497 PyErr_SetString(PyExc_ValueError,
4498 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004499 goto error;
4500 }
4501 if (c != '%') {
4502 v = getnextarg(args, arglen, &argidx);
4503 if (v == NULL)
4504 goto error;
4505 }
4506 sign = 0;
4507 fill = ' ';
4508 switch (c) {
4509 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004510 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004511 len = 1;
4512 break;
4513 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004514 if (PyUnicode_Check(v)) {
4515 fmt = fmt_start;
4516 argidx = argidx_start;
4517 goto unicode;
4518 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004519 temp = _PyObject_Str(v);
4520 if (temp != NULL && PyUnicode_Check(temp)) {
4521 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004522 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004523 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004524 goto unicode;
4525 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004526 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004527 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004528 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004529 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004530 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004531 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004532 if (!PyString_Check(temp)) {
4533 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004534 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004535 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004536 goto error;
4537 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004538 pbuf = PyString_AS_STRING(temp);
4539 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004540 if (prec >= 0 && len > prec)
4541 len = prec;
4542 break;
4543 case 'i':
4544 case 'd':
4545 case 'u':
4546 case 'o':
4547 case 'x':
4548 case 'X':
4549 if (c == 'i')
4550 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004551 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004552 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004553 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004554 prec, c, &pbuf, &ilen);
4555 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004556 if (!temp)
4557 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004558 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004559 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004560 else {
4561 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004562 len = formatint(pbuf,
4563 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004564 flags, prec, c, v);
4565 if (len < 0)
4566 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004567 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004568 }
4569 if (flags & F_ZERO)
4570 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004571 break;
4572 case 'e':
4573 case 'E':
4574 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004575 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004576 case 'g':
4577 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004578 if (c == 'F')
4579 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004580 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004581 len = formatfloat(pbuf, sizeof(formatbuf),
4582 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004583 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004584 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004585 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004586 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004587 fill = '0';
4588 break;
4589 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004590 if (PyUnicode_Check(v)) {
4591 fmt = fmt_start;
4592 argidx = argidx_start;
4593 goto unicode;
4594 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004595 pbuf = formatbuf;
4596 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004597 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004598 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004599 break;
4600 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004601 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004602 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004603 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004604 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004605 (Py_ssize_t)(fmt - 1 -
4606 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004607 goto error;
4608 }
4609 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004610 if (*pbuf == '-' || *pbuf == '+') {
4611 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004612 len--;
4613 }
4614 else if (flags & F_SIGN)
4615 sign = '+';
4616 else if (flags & F_BLANK)
4617 sign = ' ';
4618 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004619 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004620 }
4621 if (width < len)
4622 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004623 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004624 reslen -= rescnt;
4625 rescnt = width + fmtcnt + 100;
4626 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004627 if (reslen < 0) {
4628 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004629 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004630 return PyErr_NoMemory();
4631 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004632 if (_PyString_Resize(&result, reslen) < 0) {
4633 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004634 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004635 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004636 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004637 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004638 }
4639 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004640 if (fill != ' ')
4641 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004642 rescnt--;
4643 if (width > len)
4644 width--;
4645 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004646 if ((flags & F_ALT) &&
4647 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004648 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004649 assert(pbuf[1] == c);
4650 if (fill != ' ') {
4651 *res++ = *pbuf++;
4652 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004653 }
Tim Petersfff53252001-04-12 18:38:48 +00004654 rescnt -= 2;
4655 width -= 2;
4656 if (width < 0)
4657 width = 0;
4658 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004659 }
4660 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004661 do {
4662 --rescnt;
4663 *res++ = fill;
4664 } while (--width > len);
4665 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004666 if (fill == ' ') {
4667 if (sign)
4668 *res++ = sign;
4669 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004670 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004671 assert(pbuf[0] == '0');
4672 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004673 *res++ = *pbuf++;
4674 *res++ = *pbuf++;
4675 }
4676 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004677 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004678 res += len;
4679 rescnt -= len;
4680 while (--width >= len) {
4681 --rescnt;
4682 *res++ = ' ';
4683 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004684 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004685 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004686 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004687 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004688 goto error;
4689 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004690 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004691 } /* '%' */
4692 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004693 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004694 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004695 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004696 goto error;
4697 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004698 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004699 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004700 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004701 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004702 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004703
4704 unicode:
4705 if (args_owned) {
4706 Py_DECREF(args);
4707 args_owned = 0;
4708 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004709 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004710 if (PyTuple_Check(orig_args) && argidx > 0) {
4711 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004712 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004713 v = PyTuple_New(n);
4714 if (v == NULL)
4715 goto error;
4716 while (--n >= 0) {
4717 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4718 Py_INCREF(w);
4719 PyTuple_SET_ITEM(v, n, w);
4720 }
4721 args = v;
4722 } else {
4723 Py_INCREF(orig_args);
4724 args = orig_args;
4725 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004726 args_owned = 1;
4727 /* Take what we have of the result and let the Unicode formatting
4728 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004729 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004730 if (_PyString_Resize(&result, rescnt))
4731 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004732 fmtcnt = PyString_GET_SIZE(format) - \
4733 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004734 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4735 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004736 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004737 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004738 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004739 if (v == NULL)
4740 goto error;
4741 /* Paste what we have (result) to what the Unicode formatting
4742 function returned (v) and return the result (or error) */
4743 w = PyUnicode_Concat(result, v);
4744 Py_DECREF(result);
4745 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004746 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004747 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004748
Guido van Rossume5372401993-03-16 12:15:04 +00004749 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004750 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004751 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004752 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004753 }
Guido van Rossume5372401993-03-16 12:15:04 +00004754 return NULL;
4755}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004756
Guido van Rossum2a61e741997-01-18 07:55:05 +00004757void
Fred Drakeba096332000-07-09 07:04:36 +00004758PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004759{
4760 register PyStringObject *s = (PyStringObject *)(*p);
4761 PyObject *t;
4762 if (s == NULL || !PyString_Check(s))
4763 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004764 /* If it's a string subclass, we don't really know what putting
4765 it in the interned dict might do. */
4766 if (!PyString_CheckExact(s))
4767 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004768 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004769 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004770 if (interned == NULL) {
4771 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004772 if (interned == NULL) {
4773 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004774 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004775 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004776 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004777 t = PyDict_GetItem(interned, (PyObject *)s);
4778 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004779 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004780 Py_DECREF(*p);
4781 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004782 return;
4783 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004784
Armin Rigo79f7ad22004-08-07 19:27:39 +00004785 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004786 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004787 return;
4788 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004789 /* The two references in interned are not counted by refcnt.
4790 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004791 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004792 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004793}
4794
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004795void
4796PyString_InternImmortal(PyObject **p)
4797{
4798 PyString_InternInPlace(p);
4799 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4800 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4801 Py_INCREF(*p);
4802 }
4803}
4804
Guido van Rossum2a61e741997-01-18 07:55:05 +00004805
4806PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004807PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004808{
4809 PyObject *s = PyString_FromString(cp);
4810 if (s == NULL)
4811 return NULL;
4812 PyString_InternInPlace(&s);
4813 return s;
4814}
4815
Guido van Rossum8cf04761997-08-02 02:57:45 +00004816void
Fred Drakeba096332000-07-09 07:04:36 +00004817PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004818{
4819 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004820 for (i = 0; i < UCHAR_MAX + 1; i++) {
4821 Py_XDECREF(characters[i]);
4822 characters[i] = NULL;
4823 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004824 Py_XDECREF(nullstring);
4825 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004826}
Barry Warsawa903ad982001-02-23 16:40:48 +00004827
Barry Warsawa903ad982001-02-23 16:40:48 +00004828void _Py_ReleaseInternedStrings(void)
4829{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004830 PyObject *keys;
4831 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004832 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004833 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004834
4835 if (interned == NULL || !PyDict_Check(interned))
4836 return;
4837 keys = PyDict_Keys(interned);
4838 if (keys == NULL || !PyList_Check(keys)) {
4839 PyErr_Clear();
4840 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004841 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004842
4843 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4844 detector, interned strings are not forcibly deallocated; rather, we
4845 give them their stolen references back, and then clear and DECREF
4846 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004847
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004848 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004849 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4850 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004851 for (i = 0; i < n; i++) {
4852 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4853 switch (s->ob_sstate) {
4854 case SSTATE_NOT_INTERNED:
4855 /* XXX Shouldn't happen */
4856 break;
4857 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004858 Py_Refcnt(s) += 1;
4859 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004860 break;
4861 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004862 Py_Refcnt(s) += 2;
4863 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004864 break;
4865 default:
4866 Py_FatalError("Inconsistent interned string state.");
4867 }
4868 s->ob_sstate = SSTATE_NOT_INTERNED;
4869 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004870 fprintf(stderr, "total size of all interned strings: "
4871 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4872 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004873 Py_DECREF(keys);
4874 PyDict_Clear(interned);
4875 Py_DECREF(interned);
4876 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004877}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004878
4879
4880/*********************** Str Iterator ****************************/
4881
4882typedef struct {
4883 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004884 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004885 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4886} striterobject;
4887
4888static void
4889striter_dealloc(striterobject *it)
4890{
4891 _PyObject_GC_UNTRACK(it);
4892 Py_XDECREF(it->it_seq);
4893 PyObject_GC_Del(it);
4894}
4895
4896static int
4897striter_traverse(striterobject *it, visitproc visit, void *arg)
4898{
4899 Py_VISIT(it->it_seq);
4900 return 0;
4901}
4902
4903static PyObject *
4904striter_next(striterobject *it)
4905{
4906 PyStringObject *seq;
4907 PyObject *item;
4908
4909 assert(it != NULL);
4910 seq = it->it_seq;
4911 if (seq == NULL)
4912 return NULL;
4913 assert(PyString_Check(seq));
4914
4915 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004916 item = PyString_FromStringAndSize(
4917 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004918 if (item != NULL)
4919 ++it->it_index;
4920 return item;
4921 }
4922
4923 Py_DECREF(seq);
4924 it->it_seq = NULL;
4925 return NULL;
4926}
4927
4928static PyObject *
4929striter_len(striterobject *it)
4930{
4931 Py_ssize_t len = 0;
4932 if (it->it_seq)
4933 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4934 return PyInt_FromSsize_t(len);
4935}
4936
Guido van Rossum49d6b072006-08-17 21:11:47 +00004937PyDoc_STRVAR(length_hint_doc,
4938 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004939
4940static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004941 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4942 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004943 {NULL, NULL} /* sentinel */
4944};
4945
4946PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004947 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004948 "striterator", /* tp_name */
4949 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004950 0, /* tp_itemsize */
4951 /* methods */
4952 (destructor)striter_dealloc, /* tp_dealloc */
4953 0, /* tp_print */
4954 0, /* tp_getattr */
4955 0, /* tp_setattr */
4956 0, /* tp_compare */
4957 0, /* tp_repr */
4958 0, /* tp_as_number */
4959 0, /* tp_as_sequence */
4960 0, /* tp_as_mapping */
4961 0, /* tp_hash */
4962 0, /* tp_call */
4963 0, /* tp_str */
4964 PyObject_GenericGetAttr, /* tp_getattro */
4965 0, /* tp_setattro */
4966 0, /* tp_as_buffer */
4967 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4968 0, /* tp_doc */
4969 (traverseproc)striter_traverse, /* tp_traverse */
4970 0, /* tp_clear */
4971 0, /* tp_richcompare */
4972 0, /* tp_weaklistoffset */
4973 PyObject_SelfIter, /* tp_iter */
4974 (iternextfunc)striter_next, /* tp_iternext */
4975 striter_methods, /* tp_methods */
4976 0,
4977};
4978
4979static PyObject *
4980str_iter(PyObject *seq)
4981{
4982 striterobject *it;
4983
4984 if (!PyString_Check(seq)) {
4985 PyErr_BadInternalCall();
4986 return NULL;
4987 }
4988 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
4989 if (it == NULL)
4990 return NULL;
4991 it->it_index = 0;
4992 Py_INCREF(seq);
4993 it->it_seq = (PyStringObject *)seq;
4994 _PyObject_GC_TRACK(it);
4995 return (PyObject *)it;
4996}