blob: fff4b4593aeb3721e221881a7ec503895cb223fa [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000732 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000767PyObject *
768PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000770 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000772 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000773 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000774 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000775 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000776 PyErr_SetString(PyExc_OverflowError,
777 "string is too large to make repr");
778 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000779 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000781 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 }
783 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000784 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000785 register Py_UNICODE c;
786 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 int quote;
788
Thomas Wouters7e474022000-07-16 12:04:32 +0000789 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000791 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000792 char *test, *start;
793 start = PyString_AS_STRING(op);
794 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (*test == '"') {
796 quote = '\''; /* switch back to single quote */
797 goto decided;
798 }
799 else if (*test == '\'')
800 quote = '"';
801 }
802 decided:
803 ;
804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000806 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000807 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000808 /* There's at least enough room for a hex escape
809 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000810 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000814 else if (c == '\t')
815 *p++ = '\\', *p++ = 't';
816 else if (c == '\n')
817 *p++ = '\\', *p++ = 'n';
818 else if (c == '\r')
819 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000821 *p++ = '\\';
822 *p++ = 'x';
823 *p++ = hexdigits[(c & 0xf0) >> 4];
824 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 else
827 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000829 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000832 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
833 Py_DECREF(v);
834 return NULL;
835 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000836 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838}
839
Guido van Rossum189f1df2001-05-01 16:51:53 +0000840static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841string_repr(PyObject *op)
842{
843 return PyString_Repr(op, 1);
844}
845
846static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000847string_str(PyObject *s)
848{
Tim Petersc9933152001-10-16 20:18:24 +0000849 assert(PyString_Check(s));
850 if (PyString_CheckExact(s)) {
851 Py_INCREF(s);
852 return s;
853 }
854 else {
855 /* Subtype -- return genuine string with the same value. */
856 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000857 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000858 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859}
860
Martin v. Löwis18e16552006-02-15 17:27:45 +0000861static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000862string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000864 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865}
866
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000867static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000868string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000870 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871 register PyStringObject *op;
872 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000873 if (PyUnicode_Check(bb))
874 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000875 if (PyBytes_Check(bb))
876 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000877 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000878 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 return NULL;
881 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000882#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000884 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000885 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000886 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000887 Py_INCREF(bb);
888 return bb;
889 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 Py_INCREF(a);
891 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000893 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000894 if (size < 0) {
895 PyErr_SetString(PyExc_OverflowError,
896 "strings are too large to concat");
897 return NULL;
898 }
899
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000900 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000901 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000902 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000904 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000905 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000906 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000907 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
908 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000909 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911#undef b
912}
913
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000917 register Py_ssize_t i;
918 register Py_ssize_t j;
919 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000920 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000921 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 if (n < 0)
923 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000924 /* watch out for overflows: the size can overflow int,
925 * and the # of bytes needed can overflow size_t
926 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000927 size = Py_Size(a) * n;
928 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000929 PyErr_SetString(PyExc_OverflowError,
930 "repeated string is too long");
931 return NULL;
932 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000933 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 Py_INCREF(a);
935 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 }
Tim Peterse7c05322004-06-27 17:24:49 +0000937 nbytes = (size_t)size;
938 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000944 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000945 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000947 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000948 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000949 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000950 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000951 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000952 memset(op->ob_sval, a->ob_sval[0] , n);
953 return (PyObject *) op;
954 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000955 i = 0;
956 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000957 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
958 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 }
960 while (i < size) {
961 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 i += j;
964 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966}
967
968/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
969
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000971string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000972 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +0000973 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974{
975 if (i < 0)
976 i = 0;
977 if (j < 0)
978 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000979 if (j > Py_Size(a))
980 j = Py_Size(a);
981 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000982 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 Py_INCREF(a);
984 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985 }
986 if (j < i)
987 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000988 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989}
990
Guido van Rossum9284a572000-03-07 15:53:43 +0000991static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000992string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000993{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000994 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000995 if (PyUnicode_Check(sub_obj))
996 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000997 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000998 PyErr_Format(PyExc_TypeError,
999 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001000 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001001 return -1;
1002 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001003 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001004
Thomas Wouters477c8d52006-05-27 19:21:47 +00001005 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001006}
1007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001011 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001012 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001013 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001014 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015 return NULL;
1016 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001017 pchar = a->ob_sval[i];
1018 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001019 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001020 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001021 else {
1022#ifdef COUNT_ALLOCS
1023 one_strings++;
1024#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001025 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001026 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001027 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
Martin v. Löwiscd353062001-05-24 16:56:35 +00001030static PyObject*
1031string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001033 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 Py_ssize_t len_a, len_b;
1035 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001036 PyObject *result;
1037
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001038 /* Make sure both arguments are strings. */
1039 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001040 result = Py_NotImplemented;
1041 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001042 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001043 if (a == b) {
1044 switch (op) {
1045 case Py_EQ:case Py_LE:case Py_GE:
1046 result = Py_True;
1047 goto out;
1048 case Py_NE:case Py_LT:case Py_GT:
1049 result = Py_False;
1050 goto out;
1051 }
1052 }
1053 if (op == Py_EQ) {
1054 /* Supporting Py_NE here as well does not save
1055 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001058 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001059 result = Py_True;
1060 } else {
1061 result = Py_False;
1062 }
1063 goto out;
1064 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001065 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001066 min_len = (len_a < len_b) ? len_a : len_b;
1067 if (min_len > 0) {
1068 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1069 if (c==0)
1070 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001071 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001072 c = 0;
1073 if (c == 0)
1074 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1075 switch (op) {
1076 case Py_LT: c = c < 0; break;
1077 case Py_LE: c = c <= 0; break;
1078 case Py_EQ: assert(0); break; /* unreachable */
1079 case Py_NE: c = c != 0; break;
1080 case Py_GT: c = c > 0; break;
1081 case Py_GE: c = c >= 0; break;
1082 default:
1083 result = Py_NotImplemented;
1084 goto out;
1085 }
1086 result = c ? Py_True : Py_False;
1087 out:
1088 Py_INCREF(result);
1089 return result;
1090}
1091
1092int
1093_PyString_Eq(PyObject *o1, PyObject *o2)
1094{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 PyStringObject *a = (PyStringObject*) o1;
1096 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001097 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001099 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100}
1101
Guido van Rossum9bfef441993-03-29 10:43:31 +00001102static long
Fred Drakeba096332000-07-09 07:04:36 +00001103string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001104{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001105 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 register unsigned char *p;
1107 register long x;
1108
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001109 if (a->ob_shash != -1)
1110 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001111 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001112 p = (unsigned char *) a->ob_sval;
1113 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001114 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001115 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001116 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001117 if (x == -1)
1118 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001119 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001120 return x;
1121}
1122
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001123static PyObject*
1124string_subscript(PyStringObject* self, PyObject* item)
1125{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001126 if (PyIndex_Check(item)) {
1127 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001128 if (i == -1 && PyErr_Occurred())
1129 return NULL;
1130 if (i < 0)
1131 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001132 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001133 }
1134 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001135 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001136 char* source_buf;
1137 char* result_buf;
1138 PyObject* result;
1139
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141 PyString_GET_SIZE(self),
1142 &start, &stop, &step, &slicelength) < 0) {
1143 return NULL;
1144 }
1145
1146 if (slicelength <= 0) {
1147 return PyString_FromStringAndSize("", 0);
1148 }
1149 else {
1150 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001151 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001152 if (result_buf == NULL)
1153 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001154
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001155 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001156 cur += step, i++) {
1157 result_buf[i] = source_buf[cur];
1158 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001159
1160 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001161 slicelength);
1162 PyMem_Free(result_buf);
1163 return result;
1164 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001165 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001167 PyErr_Format(PyExc_TypeError,
1168 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001169 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001170 return NULL;
1171 }
1172}
1173
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001174static int
1175string_buffer_getbuffer(PyStringObject *self, PyBuffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001176{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001177 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001178}
1179
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001180static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001182 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001183 (ssizeargfunc)string_repeat, /*sq_repeat*/
1184 (ssizeargfunc)string_item, /*sq_item*/
1185 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001186 0, /*sq_ass_item*/
1187 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001188 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001189};
1190
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001191static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001193 (binaryfunc)string_subscript,
1194 0,
1195};
1196
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001197static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001198 (getbufferproc)string_buffer_getbuffer,
1199 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001200};
1201
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001203#define LEFTSTRIP 0
1204#define RIGHTSTRIP 1
1205#define BOTHSTRIP 2
1206
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001207/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001208static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1209
1210#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001211
Thomas Wouters477c8d52006-05-27 19:21:47 +00001212
1213/* Don't call if length < 2 */
1214#define Py_STRING_MATCH(target, offset, pattern, length) \
1215 (target[offset] == pattern[0] && \
1216 target[offset+length-1] == pattern[length-1] && \
1217 !memcmp(target+offset+1, pattern+1, length-2) )
1218
1219
1220/* Overallocate the initial list to reduce the number of reallocs for small
1221 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1222 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1223 text (roughly 11 words per line) and field delimited data (usually 1-10
1224 fields). For large strings the split algorithms are bandwidth limited
1225 so increasing the preallocation likely will not improve things.*/
1226
1227#define MAX_PREALLOC 12
1228
1229/* 5 splits gives 6 elements */
1230#define PREALLOC_SIZE(maxsplit) \
1231 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1232
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001233#define SPLIT_APPEND(data, left, right) \
1234 str = PyString_FromStringAndSize((data) + (left), \
1235 (right) - (left)); \
1236 if (str == NULL) \
1237 goto onError; \
1238 if (PyList_Append(list, str)) { \
1239 Py_DECREF(str); \
1240 goto onError; \
1241 } \
1242 else \
1243 Py_DECREF(str);
1244
Thomas Wouters477c8d52006-05-27 19:21:47 +00001245#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001246 str = PyString_FromStringAndSize((data) + (left), \
1247 (right) - (left)); \
1248 if (str == NULL) \
1249 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001250 if (count < MAX_PREALLOC) { \
1251 PyList_SET_ITEM(list, count, str); \
1252 } else { \
1253 if (PyList_Append(list, str)) { \
1254 Py_DECREF(str); \
1255 goto onError; \
1256 } \
1257 else \
1258 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001259 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001260 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261
Thomas Wouters477c8d52006-05-27 19:21:47 +00001262/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001263#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001264
1265#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1266#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1267#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1268#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1269
1270Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001271split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001273 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001274 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001276
1277 if (list == NULL)
1278 return NULL;
1279
Thomas Wouters477c8d52006-05-27 19:21:47 +00001280 i = j = 0;
1281
1282 while (maxsplit-- > 0) {
1283 SKIP_SPACE(s, i, len);
1284 if (i==len) break;
1285 j = i; i++;
1286 SKIP_NONSPACE(s, i, len);
1287 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001289
1290 if (i < len) {
1291 /* Only occurs when maxsplit was reached */
1292 /* Skip any remaining whitespace and copy to end of string */
1293 SKIP_SPACE(s, i, len);
1294 if (i != len)
1295 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001299 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 Py_DECREF(list);
1301 return NULL;
1302}
1303
Thomas Wouters477c8d52006-05-27 19:21:47 +00001304Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001305split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001306{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001307 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001308 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001309 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310
1311 if (list == NULL)
1312 return NULL;
1313
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314 i = j = 0;
1315 while ((j < len) && (maxcount-- > 0)) {
1316 for(; j<len; j++) {
1317 /* I found that using memchr makes no difference */
1318 if (s[j] == ch) {
1319 SPLIT_ADD(s, i, j);
1320 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001321 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 }
1323 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001325 if (i <= len) {
1326 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001327 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001328 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001329 return list;
1330
1331 onError:
1332 Py_DECREF(list);
1333 return NULL;
1334}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001336PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337"S.split([sep [,maxsplit]]) -> list of strings\n\
1338\n\
1339Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001340delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001341splits are done. If sep is not specified or is None, any\n\
1342whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343
1344static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001345string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001347 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001348 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001350 PyObject *list, *str, *subobj = Py_None;
1351#ifdef USE_FAST
1352 Py_ssize_t pos;
1353#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001355 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001357 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001358 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 if (PyString_Check(subobj)) {
1362 sub = PyString_AS_STRING(subobj);
1363 n = PyString_GET_SIZE(subobj);
1364 }
1365 else if (PyUnicode_Check(subobj))
1366 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1367 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1368 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001369
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 if (n == 0) {
1371 PyErr_SetString(PyExc_ValueError, "empty separator");
1372 return NULL;
1373 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001374 else if (n == 1)
1375 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 if (list == NULL)
1379 return NULL;
1380
Thomas Wouters477c8d52006-05-27 19:21:47 +00001381#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001383 while (maxsplit-- > 0) {
1384 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1385 if (pos < 0)
1386 break;
1387 j = i+pos;
1388 SPLIT_ADD(s, i, j);
1389 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001391#else
1392 i = j = 0;
1393 while ((j+n <= len) && (maxsplit-- > 0)) {
1394 for (; j+n <= len; j++) {
1395 if (Py_STRING_MATCH(s, j, sub, n)) {
1396 SPLIT_ADD(s, i, j);
1397 i = j = j + n;
1398 break;
1399 }
1400 }
1401 }
1402#endif
1403 SPLIT_ADD(s, i, len);
1404 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 return list;
1406
Thomas Wouters477c8d52006-05-27 19:21:47 +00001407 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408 Py_DECREF(list);
1409 return NULL;
1410}
1411
Thomas Wouters477c8d52006-05-27 19:21:47 +00001412PyDoc_STRVAR(partition__doc__,
1413"S.partition(sep) -> (head, sep, tail)\n\
1414\n\
1415Searches for the separator sep in S, and returns the part before it,\n\
1416the separator itself, and the part after it. If the separator is not\n\
1417found, returns S and two empty strings.");
1418
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001419static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001420string_partition(PyStringObject *self, PyObject *sep_obj)
1421{
1422 const char *sep;
1423 Py_ssize_t sep_len;
1424
1425 if (PyString_Check(sep_obj)) {
1426 sep = PyString_AS_STRING(sep_obj);
1427 sep_len = PyString_GET_SIZE(sep_obj);
1428 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 else if (PyUnicode_Check(sep_obj))
1430 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001431 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1432 return NULL;
1433
1434 return stringlib_partition(
1435 (PyObject*) self,
1436 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1437 sep_obj, sep, sep_len
1438 );
1439}
1440
1441PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001442"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001443\n\
1444Searches for the separator sep in S, starting at the end of S, and returns\n\
1445the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001446separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001447
1448static PyObject *
1449string_rpartition(PyStringObject *self, PyObject *sep_obj)
1450{
1451 const char *sep;
1452 Py_ssize_t sep_len;
1453
1454 if (PyString_Check(sep_obj)) {
1455 sep = PyString_AS_STRING(sep_obj);
1456 sep_len = PyString_GET_SIZE(sep_obj);
1457 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001458 else if (PyUnicode_Check(sep_obj))
1459 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001460 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1461 return NULL;
1462
1463 return stringlib_rpartition(
1464 (PyObject*) self,
1465 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1466 sep_obj, sep, sep_len
1467 );
1468}
1469
1470Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001471rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001472{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001473 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001475 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001476
1477 if (list == NULL)
1478 return NULL;
1479
Thomas Wouters477c8d52006-05-27 19:21:47 +00001480 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001481
Thomas Wouters477c8d52006-05-27 19:21:47 +00001482 while (maxsplit-- > 0) {
1483 RSKIP_SPACE(s, i);
1484 if (i<0) break;
1485 j = i; i--;
1486 RSKIP_NONSPACE(s, i);
1487 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001488 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001489 if (i >= 0) {
1490 /* Only occurs when maxsplit was reached */
1491 /* Skip any remaining whitespace and copy to beginning of string */
1492 RSKIP_SPACE(s, i);
1493 if (i >= 0)
1494 SPLIT_ADD(s, 0, i + 1);
1495
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001496 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001497 FIX_PREALLOC_SIZE(list);
1498 if (PyList_Reverse(list) < 0)
1499 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001500 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001501 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001502 Py_DECREF(list);
1503 return NULL;
1504}
1505
Thomas Wouters477c8d52006-05-27 19:21:47 +00001506Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001507rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512
1513 if (list == NULL)
1514 return NULL;
1515
Thomas Wouters477c8d52006-05-27 19:21:47 +00001516 i = j = len - 1;
1517 while ((i >= 0) && (maxcount-- > 0)) {
1518 for (; i >= 0; i--) {
1519 if (s[i] == ch) {
1520 SPLIT_ADD(s, i + 1, j + 1);
1521 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001522 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001523 }
1524 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001525 }
1526 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001527 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001528 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001529 FIX_PREALLOC_SIZE(list);
1530 if (PyList_Reverse(list) < 0)
1531 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001532 return list;
1533
1534 onError:
1535 Py_DECREF(list);
1536 return NULL;
1537}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001538
1539PyDoc_STRVAR(rsplit__doc__,
1540"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1541\n\
1542Return a list of the words in the string S, using sep as the\n\
1543delimiter string, starting at the end of the string and working\n\
1544to the front. If maxsplit is given, at most maxsplit splits are\n\
1545done. If sep is not specified or is None, any whitespace string\n\
1546is a separator.");
1547
1548static PyObject *
1549string_rsplit(PyStringObject *self, PyObject *args)
1550{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001551 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001552 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001553 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001554 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001555
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001556 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001557 return NULL;
1558 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001559 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001560 if (subobj == Py_None)
1561 return rsplit_whitespace(s, len, maxsplit);
1562 if (PyString_Check(subobj)) {
1563 sub = PyString_AS_STRING(subobj);
1564 n = PyString_GET_SIZE(subobj);
1565 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001568 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1569 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001570
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001571 if (n == 0) {
1572 PyErr_SetString(PyExc_ValueError, "empty separator");
1573 return NULL;
1574 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001575 else if (n == 1)
1576 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001577
Thomas Wouters477c8d52006-05-27 19:21:47 +00001578 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579 if (list == NULL)
1580 return NULL;
1581
1582 j = len;
1583 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001584
Thomas Wouters477c8d52006-05-27 19:21:47 +00001585 while ( (i >= 0) && (maxsplit-- > 0) ) {
1586 for (; i>=0; i--) {
1587 if (Py_STRING_MATCH(s, i, sub, n)) {
1588 SPLIT_ADD(s, i + n, j);
1589 j = i;
1590 i -= n;
1591 break;
1592 }
1593 }
1594 }
1595 SPLIT_ADD(s, 0, j);
1596 FIX_PREALLOC_SIZE(list);
1597 if (PyList_Reverse(list) < 0)
1598 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001599 return list;
1600
Thomas Wouters477c8d52006-05-27 19:21:47 +00001601onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001602 Py_DECREF(list);
1603 return NULL;
1604}
1605
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608"S.join(sequence) -> string\n\
1609\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001610Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001611sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612
1613static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001614string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615{
1616 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001617 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001620 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001622 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001623 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624
Tim Peters19fe14e2001-01-19 03:03:47 +00001625 seq = PySequence_Fast(orig, "");
1626 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001627 return NULL;
1628 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001629
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001630 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001631 if (seqlen == 0) {
1632 Py_DECREF(seq);
1633 return PyString_FromString("");
1634 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001636 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001637 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1638 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001640 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001641 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643
Raymond Hettinger674f2412004-08-23 23:23:54 +00001644 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001645 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001646 * Do a pre-pass to figure out the total amount of space we'll
1647 * need (sz), see whether any argument is absurd, and defer to
1648 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001649 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001650 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001651 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001652 item = PySequence_Fast_GET_ITEM(seq, i);
1653 if (!PyString_Check(item)){
1654 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001655 /* Defer to Unicode join.
1656 * CAUTION: There's no gurantee that the
1657 * original sequence can be iterated over
1658 * again, so we must pass seq here.
1659 */
1660 PyObject *result;
1661 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001662 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001663 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001664 }
1665 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001666 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001667 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001668 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001669 Py_DECREF(seq);
1670 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001671 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001672 sz += PyString_GET_SIZE(item);
1673 if (i != 0)
1674 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001675 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001676 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001677 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001678 Py_DECREF(seq);
1679 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001681 }
1682
1683 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001684 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001685 if (res == NULL) {
1686 Py_DECREF(seq);
1687 return NULL;
1688 }
1689
1690 /* Catenate everything. */
1691 p = PyString_AS_STRING(res);
1692 for (i = 0; i < seqlen; ++i) {
1693 size_t n;
1694 item = PySequence_Fast_GET_ITEM(seq, i);
1695 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001696 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001697 p += n;
1698 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001699 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001700 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001701 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001703
Jeremy Hylton49048292000-07-11 03:28:17 +00001704 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706}
1707
Tim Peters52e155e2001-06-16 05:42:57 +00001708PyObject *
1709_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001710{
Tim Petersa7259592001-06-16 05:11:17 +00001711 assert(sep != NULL && PyString_Check(sep));
1712 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001713 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001714}
1715
Thomas Wouters477c8d52006-05-27 19:21:47 +00001716Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001717string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001718{
1719 if (*end > len)
1720 *end = len;
1721 else if (*end < 0)
1722 *end += len;
1723 if (*end < 0)
1724 *end = 0;
1725 if (*start < 0)
1726 *start += len;
1727 if (*start < 0)
1728 *start = 0;
1729}
1730
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001732string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001735 const char *sub;
1736 Py_ssize_t sub_len;
1737 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738
Thomas Wouters477c8d52006-05-27 19:21:47 +00001739 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1740 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 return -2;
1742 if (PyString_Check(subobj)) {
1743 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001744 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 }
1746 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001747 return PyUnicode_Find(
1748 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001749 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001750 /* XXX - the "expected a character buffer object" is pretty
1751 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 return -2;
1753
Thomas Wouters477c8d52006-05-27 19:21:47 +00001754 if (dir > 0)
1755 return stringlib_find_slice(
1756 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1757 sub, sub_len, start, end);
1758 else
1759 return stringlib_rfind_slice(
1760 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1761 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762}
1763
1764
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001765PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766"S.find(sub [,start [,end]]) -> int\n\
1767\n\
1768Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001769such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770arguments start and end are interpreted as in slice notation.\n\
1771\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773
1774static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001775string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001777 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 if (result == -2)
1779 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001780 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781}
1782
1783
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001784PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785"S.index(sub [,start [,end]]) -> int\n\
1786\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001787Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788
1789static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001790string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001792 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793 if (result == -2)
1794 return NULL;
1795 if (result == -1) {
1796 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001797 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798 return NULL;
1799 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001800 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801}
1802
1803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805"S.rfind(sub [,start [,end]]) -> int\n\
1806\n\
1807Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001808such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809arguments start and end are interpreted as in slice notation.\n\
1810\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812
1813static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001814string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001816 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 if (result == -2)
1818 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001819 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820}
1821
1822
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001823PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824"S.rindex(sub [,start [,end]]) -> int\n\
1825\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001826Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827
1828static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001829string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 if (result == -2)
1833 return NULL;
1834 if (result == -1) {
1835 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001836 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837 return NULL;
1838 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001839 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840}
1841
1842
Thomas Wouters477c8d52006-05-27 19:21:47 +00001843Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001844do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1845{
1846 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001847 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001848 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001849 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1850 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001851
1852 i = 0;
1853 if (striptype != RIGHTSTRIP) {
1854 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1855 i++;
1856 }
1857 }
1858
1859 j = len;
1860 if (striptype != LEFTSTRIP) {
1861 do {
1862 j--;
1863 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1864 j++;
1865 }
1866
1867 if (i == 0 && j == len && PyString_CheckExact(self)) {
1868 Py_INCREF(self);
1869 return (PyObject*)self;
1870 }
1871 else
1872 return PyString_FromStringAndSize(s+i, j-i);
1873}
1874
1875
Thomas Wouters477c8d52006-05-27 19:21:47 +00001876Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001877do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878{
1879 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001880 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 i = 0;
1883 if (striptype != RIGHTSTRIP) {
1884 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1885 i++;
1886 }
1887 }
1888
1889 j = len;
1890 if (striptype != LEFTSTRIP) {
1891 do {
1892 j--;
1893 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1894 j++;
1895 }
1896
Tim Peters8fa5dd02001-09-12 02:18:30 +00001897 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 Py_INCREF(self);
1899 return (PyObject*)self;
1900 }
1901 else
1902 return PyString_FromStringAndSize(s+i, j-i);
1903}
1904
1905
Thomas Wouters477c8d52006-05-27 19:21:47 +00001906Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001907do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1908{
1909 PyObject *sep = NULL;
1910
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001911 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001912 return NULL;
1913
1914 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001915 if (PyString_Check(sep))
1916 return do_xstrip(self, striptype, sep);
1917 else if (PyUnicode_Check(sep)) {
1918 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1919 PyObject *res;
1920 if (uniself==NULL)
1921 return NULL;
1922 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1923 striptype, sep);
1924 Py_DECREF(uniself);
1925 return res;
1926 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001927 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001928 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001929 STRIPNAME(striptype));
1930 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001931 }
1932
1933 return do_strip(self, striptype);
1934}
1935
1936
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001938"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939\n\
1940Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001941whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001942If chars is given and not None, remove characters in chars instead.\n\
1943If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
1945static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001946string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001948 if (PyTuple_GET_SIZE(args) == 0)
1949 return do_strip(self, BOTHSTRIP); /* Common case */
1950 else
1951 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952}
1953
1954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001955PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001956"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001958Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001959If chars is given and not None, remove characters in chars instead.\n\
1960If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961
1962static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001965 if (PyTuple_GET_SIZE(args) == 0)
1966 return do_strip(self, LEFTSTRIP); /* Common case */
1967 else
1968 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969}
1970
1971
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001972PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001973"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001976If chars is given and not None, remove characters in chars instead.\n\
1977If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978
1979static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982 if (PyTuple_GET_SIZE(args) == 0)
1983 return do_strip(self, RIGHTSTRIP); /* Common case */
1984 else
1985 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986}
1987
1988
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001989PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990"S.lower() -> string\n\
1991\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
Thomas Wouters477c8d52006-05-27 19:21:47 +00001994/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1995#ifndef _tolower
1996#define _tolower tolower
1997#endif
1998
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002000string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002002 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002004 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002006 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002009
2010 s = PyString_AS_STRING(newobj);
2011
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002012 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002013
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002015 int c = Py_CHARMASK(s[i]);
2016 if (isupper(c))
2017 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002019
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002020 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021}
2022
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002023PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024"S.upper() -> string\n\
2025\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002026Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
Thomas Wouters477c8d52006-05-27 19:21:47 +00002028#ifndef _toupper
2029#define _toupper toupper
2030#endif
2031
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002033string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002035 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002036 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002037 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002039 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002040 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002042
2043 s = PyString_AS_STRING(newobj);
2044
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002045 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002046
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002048 int c = Py_CHARMASK(s[i]);
2049 if (islower(c))
2050 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002052
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002053 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054}
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057"S.title() -> string\n\
2058\n\
2059Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
2062static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002063string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064{
2065 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002066 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002068 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002070 newobj = PyString_FromStringAndSize(NULL, n);
2071 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002073 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074 for (i = 0; i < n; i++) {
2075 int c = Py_CHARMASK(*s++);
2076 if (islower(c)) {
2077 if (!previous_is_cased)
2078 c = toupper(c);
2079 previous_is_cased = 1;
2080 } else if (isupper(c)) {
2081 if (previous_is_cased)
2082 c = tolower(c);
2083 previous_is_cased = 1;
2084 } else
2085 previous_is_cased = 0;
2086 *s_new++ = c;
2087 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002088 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089}
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.capitalize() -> string\n\
2093\n\
2094Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002095capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002098string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
2100 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002101 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002102 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002104 newobj = PyString_FromStringAndSize(NULL, n);
2105 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002107 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 if (0 < n) {
2109 int c = Py_CHARMASK(*s++);
2110 if (islower(c))
2111 *s_new = toupper(c);
2112 else
2113 *s_new = c;
2114 s_new++;
2115 }
2116 for (i = 1; i < n; i++) {
2117 int c = Py_CHARMASK(*s++);
2118 if (isupper(c))
2119 *s_new = tolower(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002124 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129"S.count(sub[, start[, end]]) -> int\n\
2130\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002131Return the number of non-overlapping occurrences of substring sub in\n\
2132string S[start:end]. Optional arguments start and end are interpreted\n\
2133as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002136string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002138 PyObject *sub_obj;
2139 const char *str = PyString_AS_STRING(self), *sub;
2140 Py_ssize_t sub_len;
2141 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
Thomas Wouters477c8d52006-05-27 19:21:47 +00002143 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2144 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002146
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147 if (PyString_Check(sub_obj)) {
2148 sub = PyString_AS_STRING(sub_obj);
2149 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002151 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002152 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002154 if (count == -1)
2155 return NULL;
2156 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002157 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002158 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160 return NULL;
2161
Thomas Wouters477c8d52006-05-27 19:21:47 +00002162 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002163
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164 return PyInt_FromSsize_t(
2165 stringlib_count(str + start, end - start, sub, sub_len)
2166 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167}
2168
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002169PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170"S.swapcase() -> string\n\
2171\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002173converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174
2175static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002176string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177{
2178 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002179 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002180 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002182 newobj = PyString_FromStringAndSize(NULL, n);
2183 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002185 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186 for (i = 0; i < n; i++) {
2187 int c = Py_CHARMASK(*s++);
2188 if (islower(c)) {
2189 *s_new = toupper(c);
2190 }
2191 else if (isupper(c)) {
2192 *s_new = tolower(c);
2193 }
2194 else
2195 *s_new = c;
2196 s_new++;
2197 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002198 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199}
2200
2201
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002202PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203"S.translate(table [,deletechars]) -> string\n\
2204\n\
2205Return a copy of the string S, where all characters occurring\n\
2206in the optional argument deletechars are removed, and the\n\
2207remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209
2210static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002211string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002214 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002215 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002217 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002218 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219 PyObject *result;
2220 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002223 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226
2227 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002228 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002229 tablen = PyString_GET_SIZE(tableobj);
2230 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002231 else if (tableobj == Py_None) {
2232 table = NULL;
2233 tablen = 256;
2234 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002236 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237 parameter; instead a mapping to None will cause characters
2238 to be deleted. */
2239 if (delobj != NULL) {
2240 PyErr_SetString(PyExc_TypeError,
2241 "deletions are implemented differently for unicode");
2242 return NULL;
2243 }
2244 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2245 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002246 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248
Martin v. Löwis00b61272002-12-12 20:03:19 +00002249 if (tablen != 256) {
2250 PyErr_SetString(PyExc_ValueError,
2251 "translation table must be 256 characters long");
2252 return NULL;
2253 }
2254
Guido van Rossum4c08d552000-03-10 22:55:18 +00002255 if (delobj != NULL) {
2256 if (PyString_Check(delobj)) {
2257 del_table = PyString_AS_STRING(delobj);
2258 dellen = PyString_GET_SIZE(delobj);
2259 }
2260 else if (PyUnicode_Check(delobj)) {
2261 PyErr_SetString(PyExc_TypeError,
2262 "deletions are implemented differently for unicode");
2263 return NULL;
2264 }
2265 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2266 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 }
2268 else {
2269 del_table = NULL;
2270 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271 }
2272
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002273 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274 result = PyString_FromStringAndSize((char *)NULL, inlen);
2275 if (result == NULL)
2276 return NULL;
2277 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002278 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279
Guido van Rossumd8faa362007-04-27 19:54:29 +00002280 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281 /* If no deletions are required, use faster code */
2282 for (i = inlen; --i >= 0; ) {
2283 c = Py_CHARMASK(*input++);
2284 if (Py_CHARMASK((*output++ = table[c])) != c)
2285 changed = 1;
2286 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002287 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288 return result;
2289 Py_DECREF(result);
2290 Py_INCREF(input_obj);
2291 return input_obj;
2292 }
2293
Guido van Rossumd8faa362007-04-27 19:54:29 +00002294 if (table == NULL) {
2295 for (i = 0; i < 256; i++)
2296 trans_table[i] = Py_CHARMASK(i);
2297 } else {
2298 for (i = 0; i < 256; i++)
2299 trans_table[i] = Py_CHARMASK(table[i]);
2300 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301
2302 for (i = 0; i < dellen; i++)
2303 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2304
2305 for (i = inlen; --i >= 0; ) {
2306 c = Py_CHARMASK(*input++);
2307 if (trans_table[c] != -1)
2308 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2309 continue;
2310 changed = 1;
2311 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002312 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313 Py_DECREF(result);
2314 Py_INCREF(input_obj);
2315 return input_obj;
2316 }
2317 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002318 if (inlen > 0)
2319 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 return result;
2321}
2322
2323
Thomas Wouters477c8d52006-05-27 19:21:47 +00002324#define FORWARD 1
2325#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326
Thomas Wouters477c8d52006-05-27 19:21:47 +00002327/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329#define findchar(target, target_len, c) \
2330 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331
Thomas Wouters477c8d52006-05-27 19:21:47 +00002332/* String ops must return a string. */
2333/* If the object is subclass of string, create a copy */
2334Py_LOCAL(PyStringObject *)
2335return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002337 if (PyString_CheckExact(self)) {
2338 Py_INCREF(self);
2339 return self;
2340 }
2341 return (PyStringObject *)PyString_FromStringAndSize(
2342 PyString_AS_STRING(self),
2343 PyString_GET_SIZE(self));
2344}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345
Thomas Wouters477c8d52006-05-27 19:21:47 +00002346Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002348{
2349 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002350 const char *start=target;
2351 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352
Thomas Wouters477c8d52006-05-27 19:21:47 +00002353 while ( (start=findchar(start, end-start, c)) != NULL ) {
2354 count++;
2355 if (count >= maxcount)
2356 break;
2357 start += 1;
2358 }
2359 return count;
2360}
2361
2362Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002363findstring(const char *target, Py_ssize_t target_len,
2364 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 Py_ssize_t start,
2366 Py_ssize_t end,
2367 int direction)
2368{
2369 if (start < 0) {
2370 start += target_len;
2371 if (start < 0)
2372 start = 0;
2373 }
2374 if (end > target_len) {
2375 end = target_len;
2376 } else if (end < 0) {
2377 end += target_len;
2378 if (end < 0)
2379 end = 0;
2380 }
2381
2382 /* zero-length substrings always match at the first attempt */
2383 if (pattern_len == 0)
2384 return (direction > 0) ? start : end;
2385
2386 end -= pattern_len;
2387
2388 if (direction < 0) {
2389 for (; end >= start; end--)
2390 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2391 return end;
2392 } else {
2393 for (; start <= end; start++)
2394 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2395 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396 }
2397 return -1;
2398}
2399
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002401countstring(const char *target, Py_ssize_t target_len,
2402 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002403 Py_ssize_t start,
2404 Py_ssize_t end,
2405 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409 if (start < 0) {
2410 start += target_len;
2411 if (start < 0)
2412 start = 0;
2413 }
2414 if (end > target_len) {
2415 end = target_len;
2416 } else if (end < 0) {
2417 end += target_len;
2418 if (end < 0)
2419 end = 0;
2420 }
2421
2422 /* zero-length substrings match everywhere */
2423 if (pattern_len == 0 || maxcount == 0) {
2424 if (target_len+1 < maxcount)
2425 return target_len+1;
2426 return maxcount;
2427 }
2428
2429 end -= pattern_len;
2430 if (direction < 0) {
2431 for (; (end >= start); end--)
2432 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2433 count++;
2434 if (--maxcount <= 0) break;
2435 end -= pattern_len-1;
2436 }
2437 } else {
2438 for (; (start <= end); start++)
2439 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2440 count++;
2441 if (--maxcount <= 0)
2442 break;
2443 start += pattern_len-1;
2444 }
2445 }
2446 return count;
2447}
2448
2449
2450/* Algorithms for different cases of string replacement */
2451
2452/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2453Py_LOCAL(PyStringObject *)
2454replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002455 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002456 Py_ssize_t maxcount)
2457{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002458 char *self_s, *result_s;
2459 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002460 Py_ssize_t count, i, product;
2461 PyStringObject *result;
2462
2463 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464
Thomas Wouters477c8d52006-05-27 19:21:47 +00002465 /* 1 at the end plus 1 after every character */
2466 count = self_len+1;
2467 if (maxcount < count)
2468 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469
Thomas Wouters477c8d52006-05-27 19:21:47 +00002470 /* Check for overflow */
2471 /* result_len = count * to_len + self_len; */
2472 product = count * to_len;
2473 if (product / to_len != count) {
2474 PyErr_SetString(PyExc_OverflowError,
2475 "replace string is too long");
2476 return NULL;
2477 }
2478 result_len = product + self_len;
2479 if (result_len < 0) {
2480 PyErr_SetString(PyExc_OverflowError,
2481 "replace string is too long");
2482 return NULL;
2483 }
2484
2485 if (! (result = (PyStringObject *)
2486 PyString_FromStringAndSize(NULL, result_len)) )
2487 return NULL;
2488
2489 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002490 result_s = PyString_AS_STRING(result);
2491
2492 /* TODO: special case single character, which doesn't need memcpy */
2493
2494 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002495 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496 result_s += to_len;
2497 count -= 1;
2498
2499 for (i=0; i<count; i++) {
2500 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002501 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002502 result_s += to_len;
2503 }
2504
2505 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002506 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002507
2508 return result;
2509}
2510
2511/* Special case for deleting a single character */
2512/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2513Py_LOCAL(PyStringObject *)
2514replace_delete_single_character(PyStringObject *self,
2515 char from_c, Py_ssize_t maxcount)
2516{
2517 char *self_s, *result_s;
2518 char *start, *next, *end;
2519 Py_ssize_t self_len, result_len;
2520 Py_ssize_t count;
2521 PyStringObject *result;
2522
2523 self_len = PyString_GET_SIZE(self);
2524 self_s = PyString_AS_STRING(self);
2525
2526 count = countchar(self_s, self_len, from_c, maxcount);
2527 if (count == 0) {
2528 return return_self(self);
2529 }
2530
2531 result_len = self_len - count; /* from_len == 1 */
2532 assert(result_len>=0);
2533
2534 if ( (result = (PyStringObject *)
2535 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2536 return NULL;
2537 result_s = PyString_AS_STRING(result);
2538
2539 start = self_s;
2540 end = self_s + self_len;
2541 while (count-- > 0) {
2542 next = findchar(start, end-start, from_c);
2543 if (next == NULL)
2544 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002545 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002546 result_s += (next-start);
2547 start = next+1;
2548 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002549 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002550
Thomas Wouters477c8d52006-05-27 19:21:47 +00002551 return result;
2552}
2553
2554/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2555
2556Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002557replace_delete_substring(PyStringObject *self,
2558 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002559 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002561 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002562 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 Py_ssize_t count, offset;
2564 PyStringObject *result;
2565
2566 self_len = PyString_GET_SIZE(self);
2567 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002568
2569 count = countstring(self_s, self_len,
2570 from_s, from_len,
2571 0, self_len, 1,
2572 maxcount);
2573
2574 if (count == 0) {
2575 /* no matches */
2576 return return_self(self);
2577 }
2578
2579 result_len = self_len - (count * from_len);
2580 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002581
Thomas Wouters477c8d52006-05-27 19:21:47 +00002582 if ( (result = (PyStringObject *)
2583 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2584 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002585
Thomas Wouters477c8d52006-05-27 19:21:47 +00002586 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587
Thomas Wouters477c8d52006-05-27 19:21:47 +00002588 start = self_s;
2589 end = self_s + self_len;
2590 while (count-- > 0) {
2591 offset = findstring(start, end-start,
2592 from_s, from_len,
2593 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594 if (offset == -1)
2595 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002596 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002597
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002598 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002599
Thomas Wouters477c8d52006-05-27 19:21:47 +00002600 result_s += (next-start);
2601 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002602 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002603 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002604 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2608Py_LOCAL(PyStringObject *)
2609replace_single_character_in_place(PyStringObject *self,
2610 char from_c, char to_c,
2611 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002612{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002613 char *self_s, *result_s, *start, *end, *next;
2614 Py_ssize_t self_len;
2615 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002616
Thomas Wouters477c8d52006-05-27 19:21:47 +00002617 /* The result string will be the same size */
2618 self_s = PyString_AS_STRING(self);
2619 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002620
Thomas Wouters477c8d52006-05-27 19:21:47 +00002621 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002622
Thomas Wouters477c8d52006-05-27 19:21:47 +00002623 if (next == NULL) {
2624 /* No matches; return the original string */
2625 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002626 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002627
Thomas Wouters477c8d52006-05-27 19:21:47 +00002628 /* Need to make a new string */
2629 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2630 if (result == NULL)
2631 return NULL;
2632 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002633 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002634
Thomas Wouters477c8d52006-05-27 19:21:47 +00002635 /* change everything in-place, starting with this one */
2636 start = result_s + (next-self_s);
2637 *start = to_c;
2638 start++;
2639 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002640
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 while (--maxcount > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
2645 *next = to_c;
2646 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002647 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002648
Thomas Wouters477c8d52006-05-27 19:21:47 +00002649 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002650}
2651
Thomas Wouters477c8d52006-05-27 19:21:47 +00002652/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2653Py_LOCAL(PyStringObject *)
2654replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002655 const char *from_s, Py_ssize_t from_len,
2656 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002657 Py_ssize_t maxcount)
2658{
2659 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002660 char *self_s;
2661 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002662 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002663
Thomas Wouters477c8d52006-05-27 19:21:47 +00002664 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665
Thomas Wouters477c8d52006-05-27 19:21:47 +00002666 self_s = PyString_AS_STRING(self);
2667 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002668
Thomas Wouters477c8d52006-05-27 19:21:47 +00002669 offset = findstring(self_s, self_len,
2670 from_s, from_len,
2671 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002672 if (offset == -1) {
2673 /* No matches; return the original string */
2674 return return_self(self);
2675 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002676
Thomas Wouters477c8d52006-05-27 19:21:47 +00002677 /* Need to make a new string */
2678 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2679 if (result == NULL)
2680 return NULL;
2681 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002682 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002683
Thomas Wouters477c8d52006-05-27 19:21:47 +00002684 /* change everything in-place, starting with this one */
2685 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002686 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002687 start += from_len;
2688 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689
Thomas Wouters477c8d52006-05-27 19:21:47 +00002690 while ( --maxcount > 0) {
2691 offset = findstring(start, end-start,
2692 from_s, from_len,
2693 0, end-start, FORWARD);
2694 if (offset==-1)
2695 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002696 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002697 start += offset+from_len;
2698 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002699
Thomas Wouters477c8d52006-05-27 19:21:47 +00002700 return result;
2701}
2702
2703/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2704Py_LOCAL(PyStringObject *)
2705replace_single_character(PyStringObject *self,
2706 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002707 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708 Py_ssize_t maxcount)
2709{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002710 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002711 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002712 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002713 Py_ssize_t count, product;
2714 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002715
Thomas Wouters477c8d52006-05-27 19:21:47 +00002716 self_s = PyString_AS_STRING(self);
2717 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002718
Thomas Wouters477c8d52006-05-27 19:21:47 +00002719 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002720 if (count == 0) {
2721 /* no matches, return unchanged */
2722 return return_self(self);
2723 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002724
Thomas Wouters477c8d52006-05-27 19:21:47 +00002725 /* use the difference between current and new, hence the "-1" */
2726 /* result_len = self_len + count * (to_len-1) */
2727 product = count * (to_len-1);
2728 if (product / (to_len-1) != count) {
2729 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2730 return NULL;
2731 }
2732 result_len = self_len + product;
2733 if (result_len < 0) {
2734 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2735 return NULL;
2736 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002737
Thomas Wouters477c8d52006-05-27 19:21:47 +00002738 if ( (result = (PyStringObject *)
2739 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2740 return NULL;
2741 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002742
Thomas Wouters477c8d52006-05-27 19:21:47 +00002743 start = self_s;
2744 end = self_s + self_len;
2745 while (count-- > 0) {
2746 next = findchar(start, end-start, from_c);
2747 if (next == NULL)
2748 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002749
Thomas Wouters477c8d52006-05-27 19:21:47 +00002750 if (next == start) {
2751 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002752 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002753 result_s += to_len;
2754 start += 1;
2755 } else {
2756 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002757 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002758 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002759 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002760 result_s += to_len;
2761 start = next+1;
2762 }
2763 }
2764 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002765 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002766
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767 return result;
2768}
2769
2770/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2771Py_LOCAL(PyStringObject *)
2772replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002773 const char *from_s, Py_ssize_t from_len,
2774 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002775 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002776 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002777 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002778 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002779 Py_ssize_t count, offset, product;
2780 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002781
Thomas Wouters477c8d52006-05-27 19:21:47 +00002782 self_s = PyString_AS_STRING(self);
2783 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002784
Thomas Wouters477c8d52006-05-27 19:21:47 +00002785 count = countstring(self_s, self_len,
2786 from_s, from_len,
2787 0, self_len, FORWARD, maxcount);
2788 if (count == 0) {
2789 /* no matches, return unchanged */
2790 return return_self(self);
2791 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002792
Thomas Wouters477c8d52006-05-27 19:21:47 +00002793 /* Check for overflow */
2794 /* result_len = self_len + count * (to_len-from_len) */
2795 product = count * (to_len-from_len);
2796 if (product / (to_len-from_len) != count) {
2797 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2798 return NULL;
2799 }
2800 result_len = self_len + product;
2801 if (result_len < 0) {
2802 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2803 return NULL;
2804 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002805
Thomas Wouters477c8d52006-05-27 19:21:47 +00002806 if ( (result = (PyStringObject *)
2807 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2808 return NULL;
2809 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002810
Thomas Wouters477c8d52006-05-27 19:21:47 +00002811 start = self_s;
2812 end = self_s + self_len;
2813 while (count-- > 0) {
2814 offset = findstring(start, end-start,
2815 from_s, from_len,
2816 0, end-start, FORWARD);
2817 if (offset == -1)
2818 break;
2819 next = start+offset;
2820 if (next == start) {
2821 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002822 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002823 result_s += to_len;
2824 start += from_len;
2825 } else {
2826 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002827 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002828 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002829 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002830 result_s += to_len;
2831 start = next+from_len;
2832 }
2833 }
2834 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002835 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002836
Thomas Wouters477c8d52006-05-27 19:21:47 +00002837 return result;
2838}
2839
2840
2841Py_LOCAL(PyStringObject *)
2842replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002843 const char *from_s, Py_ssize_t from_len,
2844 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002845 Py_ssize_t maxcount)
2846{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002847 if (maxcount < 0) {
2848 maxcount = PY_SSIZE_T_MAX;
2849 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2850 /* nothing to do; return the original string */
2851 return return_self(self);
2852 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002853
Thomas Wouters477c8d52006-05-27 19:21:47 +00002854 if (maxcount == 0 ||
2855 (from_len == 0 && to_len == 0)) {
2856 /* nothing to do; return the original string */
2857 return return_self(self);
2858 }
2859
2860 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002861
Thomas Wouters477c8d52006-05-27 19:21:47 +00002862 if (from_len == 0) {
2863 /* insert the 'to' string everywhere. */
2864 /* >>> "Python".replace("", ".") */
2865 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002866 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002867 }
2868
2869 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2870 /* point for an empty self string to generate a non-empty string */
2871 /* Special case so the remaining code always gets a non-empty string */
2872 if (PyString_GET_SIZE(self) == 0) {
2873 return return_self(self);
2874 }
2875
2876 if (to_len == 0) {
2877 /* delete all occurances of 'from' string */
2878 if (from_len == 1) {
2879 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002880 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002881 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002882 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002883 }
2884 }
2885
2886 /* Handle special case where both strings have the same length */
2887
2888 if (from_len == to_len) {
2889 if (from_len == 1) {
2890 return replace_single_character_in_place(
2891 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002892 from_s[0],
2893 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002894 maxcount);
2895 } else {
2896 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002897 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002898 }
2899 }
2900
2901 /* Otherwise use the more generic algorithms */
2902 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002903 return replace_single_character(self, from_s[0],
2904 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002905 } else {
2906 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002907 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002908 }
2909}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002911PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002912"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002913\n\
2914Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002915old replaced by new. If the optional argument count is\n\
2916given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002917
2918static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002919string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002920{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002921 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002922 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002923 const char *from_s, *to_s;
2924 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002925
Thomas Wouters477c8d52006-05-27 19:21:47 +00002926 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002927 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002928
Thomas Wouters477c8d52006-05-27 19:21:47 +00002929 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002930 from_s = PyString_AS_STRING(from);
2931 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002932 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002933 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002934 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002935 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002936 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002937 return NULL;
2938
Thomas Wouters477c8d52006-05-27 19:21:47 +00002939 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002940 to_s = PyString_AS_STRING(to);
2941 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002943 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002944 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002945 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002946 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002947 return NULL;
2948
Thomas Wouters477c8d52006-05-27 19:21:47 +00002949 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002950 from_s, from_len,
2951 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002952}
2953
Thomas Wouters477c8d52006-05-27 19:21:47 +00002954/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002955
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002956/* Matches the end (direction >= 0) or start (direction < 0) of self
2957 * against substr, using the start and end arguments. Returns
2958 * -1 on error, 0 if not found and 1 if found.
2959 */
2960Py_LOCAL(int)
2961_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2962 Py_ssize_t end, int direction)
2963{
2964 Py_ssize_t len = PyString_GET_SIZE(self);
2965 Py_ssize_t slen;
2966 const char* sub;
2967 const char* str;
2968
2969 if (PyString_Check(substr)) {
2970 sub = PyString_AS_STRING(substr);
2971 slen = PyString_GET_SIZE(substr);
2972 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002973 else if (PyUnicode_Check(substr))
2974 return PyUnicode_Tailmatch((PyObject *)self,
2975 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002976 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2977 return -1;
2978 str = PyString_AS_STRING(self);
2979
2980 string_adjust_indices(&start, &end, len);
2981
2982 if (direction < 0) {
2983 /* startswith */
2984 if (start+slen > len)
2985 return 0;
2986 } else {
2987 /* endswith */
2988 if (end-start < slen || start > len)
2989 return 0;
2990
2991 if (end-slen > start)
2992 start = end - slen;
2993 }
2994 if (end-start >= slen)
2995 return ! memcmp(str+start, sub, slen);
2996 return 0;
2997}
2998
2999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003000PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003002\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003003Return True if S starts with the specified prefix, False otherwise.\n\
3004With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003005With optional end, stop comparing S at that position.\n\
3006prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003007
3008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003009string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003010{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003011 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003012 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003013 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003014 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003015
Guido van Rossumc6821402000-05-08 14:08:05 +00003016 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3017 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003018 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003019 if (PyTuple_Check(subobj)) {
3020 Py_ssize_t i;
3021 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3022 result = _string_tailmatch(self,
3023 PyTuple_GET_ITEM(subobj, i),
3024 start, end, -1);
3025 if (result == -1)
3026 return NULL;
3027 else if (result) {
3028 Py_RETURN_TRUE;
3029 }
3030 }
3031 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003032 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003033 result = _string_tailmatch(self, subobj, start, end, -1);
3034 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003035 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003036 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003037 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003038}
3039
3040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003041PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003042"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003043\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003044Return True if S ends with the specified suffix, False otherwise.\n\
3045With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003046With optional end, stop comparing S at that position.\n\
3047suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
3049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003050string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003052 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003053 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003054 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003055 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
Guido van Rossumc6821402000-05-08 14:08:05 +00003057 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3058 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003060 if (PyTuple_Check(subobj)) {
3061 Py_ssize_t i;
3062 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3063 result = _string_tailmatch(self,
3064 PyTuple_GET_ITEM(subobj, i),
3065 start, end, +1);
3066 if (result == -1)
3067 return NULL;
3068 else if (result) {
3069 Py_RETURN_TRUE;
3070 }
3071 }
3072 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003073 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003074 result = _string_tailmatch(self, subobj, start, end, +1);
3075 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003077 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003078 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003079}
3080
3081
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003082PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003083"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003084\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003085Encodes S using the codec registered for encoding. encoding defaults\n\
3086to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003087handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003088a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3089'xmlcharrefreplace' as well as any other name registered with\n\
3090codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003091
3092static PyObject *
3093string_encode(PyStringObject *self, PyObject *args)
3094{
3095 char *encoding = NULL;
3096 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003097 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003098
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003099 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3100 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003101 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003102 if (v == NULL)
3103 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003104 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003105 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003106 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003107 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003108 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003109 Py_DECREF(v);
3110 return NULL;
3111 }
3112 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003113
3114 onError:
3115 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003116}
3117
3118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003119PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003120"S.decode([encoding[,errors]]) -> object\n\
3121\n\
3122Decodes S using the codec registered for encoding. encoding defaults\n\
3123to the default encoding. errors may be given to set a different error\n\
3124handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003125a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3126as well as any other name registerd with codecs.register_error that is\n\
3127able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003128
3129static PyObject *
3130string_decode(PyStringObject *self, PyObject *args)
3131{
3132 char *encoding = NULL;
3133 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003134 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003135
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003136 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3137 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003138 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003139 if (v == NULL)
3140 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003141 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3142 PyErr_Format(PyExc_TypeError,
3143 "decoder did not return a string/unicode object "
3144 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003145 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003146 Py_DECREF(v);
3147 return NULL;
3148 }
3149 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003150
3151 onError:
3152 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003153}
3154
3155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003156PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157"S.expandtabs([tabsize]) -> string\n\
3158\n\
3159Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003160If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161
3162static PyObject*
3163string_expandtabs(PyStringObject *self, PyObject *args)
3164{
3165 const char *e, *p;
3166 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003167 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168 PyObject *u;
3169 int tabsize = 8;
3170
3171 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3172 return NULL;
3173
Thomas Wouters7e474022000-07-16 12:04:32 +00003174 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003175 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3177 for (p = PyString_AS_STRING(self); p < e; p++)
3178 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003179 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003181 if (old_j > j) {
3182 PyErr_SetString(PyExc_OverflowError,
3183 "new string is too long");
3184 return NULL;
3185 }
3186 old_j = j;
3187 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 }
3189 else {
3190 j++;
3191 if (*p == '\n' || *p == '\r') {
3192 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003193 old_j = j = 0;
3194 if (i < 0) {
3195 PyErr_SetString(PyExc_OverflowError,
3196 "new string is too long");
3197 return NULL;
3198 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 }
3200 }
3201
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003202 if ((i + j) < 0) {
3203 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3204 return NULL;
3205 }
3206
Guido van Rossum4c08d552000-03-10 22:55:18 +00003207 /* Second pass: create output string and fill it */
3208 u = PyString_FromStringAndSize(NULL, i + j);
3209 if (!u)
3210 return NULL;
3211
3212 j = 0;
3213 q = PyString_AS_STRING(u);
3214
3215 for (p = PyString_AS_STRING(self); p < e; p++)
3216 if (*p == '\t') {
3217 if (tabsize > 0) {
3218 i = tabsize - (j % tabsize);
3219 j += i;
3220 while (i--)
3221 *q++ = ' ';
3222 }
3223 }
3224 else {
3225 j++;
3226 *q++ = *p;
3227 if (*p == '\n' || *p == '\r')
3228 j = 0;
3229 }
3230
3231 return u;
3232}
3233
Thomas Wouters477c8d52006-05-27 19:21:47 +00003234Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003235pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236{
3237 PyObject *u;
3238
3239 if (left < 0)
3240 left = 0;
3241 if (right < 0)
3242 right = 0;
3243
Tim Peters8fa5dd02001-09-12 02:18:30 +00003244 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003245 Py_INCREF(self);
3246 return (PyObject *)self;
3247 }
3248
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003249 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003250 left + PyString_GET_SIZE(self) + right);
3251 if (u) {
3252 if (left)
3253 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003254 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003255 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003256 PyString_GET_SIZE(self));
3257 if (right)
3258 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3259 fill, right);
3260 }
3261
3262 return u;
3263}
3264
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003265PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003266"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003267"\n"
3268"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003269"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270
3271static PyObject *
3272string_ljust(PyStringObject *self, PyObject *args)
3273{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003274 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003275 char fillchar = ' ';
3276
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003277 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003278 return NULL;
3279
Tim Peters8fa5dd02001-09-12 02:18:30 +00003280 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003281 Py_INCREF(self);
3282 return (PyObject*) self;
3283 }
3284
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003285 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286}
3287
3288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003289PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003290"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003291"\n"
3292"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003293"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294
3295static PyObject *
3296string_rjust(PyStringObject *self, PyObject *args)
3297{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003298 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003299 char fillchar = ' ';
3300
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003301 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003302 return NULL;
3303
Tim Peters8fa5dd02001-09-12 02:18:30 +00003304 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305 Py_INCREF(self);
3306 return (PyObject*) self;
3307 }
3308
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003309 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310}
3311
3312
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003313PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003314"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003315"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003316"Return S centered in a string of length width. Padding is\n"
3317"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003318
3319static PyObject *
3320string_center(PyStringObject *self, PyObject *args)
3321{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003322 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003323 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003324 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003325
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003326 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003327 return NULL;
3328
Tim Peters8fa5dd02001-09-12 02:18:30 +00003329 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003330 Py_INCREF(self);
3331 return (PyObject*) self;
3332 }
3333
3334 marg = width - PyString_GET_SIZE(self);
3335 left = marg / 2 + (marg & width & 1);
3336
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003337 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338}
3339
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003340PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003341"S.zfill(width) -> string\n"
3342"\n"
3343"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003344"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003345
3346static PyObject *
3347string_zfill(PyStringObject *self, PyObject *args)
3348{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003349 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003350 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003351 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003352 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003353
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003354 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003355 return NULL;
3356
3357 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003358 if (PyString_CheckExact(self)) {
3359 Py_INCREF(self);
3360 return (PyObject*) self;
3361 }
3362 else
3363 return PyString_FromStringAndSize(
3364 PyString_AS_STRING(self),
3365 PyString_GET_SIZE(self)
3366 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003367 }
3368
3369 fill = width - PyString_GET_SIZE(self);
3370
3371 s = pad(self, fill, 0, '0');
3372
3373 if (s == NULL)
3374 return NULL;
3375
3376 p = PyString_AS_STRING(s);
3377 if (p[fill] == '+' || p[fill] == '-') {
3378 /* move sign to beginning of string */
3379 p[0] = p[fill];
3380 p[fill] = '0';
3381 }
3382
3383 return (PyObject*) s;
3384}
3385
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003386PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003387"S.isspace() -> bool\n\
3388\n\
3389Return True if all characters in S are whitespace\n\
3390and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003391
3392static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003393string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003394{
Fred Drakeba096332000-07-09 07:04:36 +00003395 register const unsigned char *p
3396 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003397 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 /* Shortcut for single character strings */
3400 if (PyString_GET_SIZE(self) == 1 &&
3401 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003402 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003404 /* Special case for empty strings */
3405 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003406 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003407
Guido van Rossum4c08d552000-03-10 22:55:18 +00003408 e = p + PyString_GET_SIZE(self);
3409 for (; p < e; p++) {
3410 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003411 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003413 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414}
3415
3416
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003417PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003418"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003419\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003420Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003421and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003422
3423static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003424string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003425{
Fred Drakeba096332000-07-09 07:04:36 +00003426 register const unsigned char *p
3427 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003428 register const unsigned char *e;
3429
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003430 /* Shortcut for single character strings */
3431 if (PyString_GET_SIZE(self) == 1 &&
3432 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003433 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003434
3435 /* Special case for empty strings */
3436 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003437 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003438
3439 e = p + PyString_GET_SIZE(self);
3440 for (; p < e; p++) {
3441 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003442 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003443 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003444 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003445}
3446
3447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003448PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003449"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003450\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003451Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003452and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003453
3454static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003455string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003456{
Fred Drakeba096332000-07-09 07:04:36 +00003457 register const unsigned char *p
3458 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003459 register const unsigned char *e;
3460
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003461 /* Shortcut for single character strings */
3462 if (PyString_GET_SIZE(self) == 1 &&
3463 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003464 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003465
3466 /* Special case for empty strings */
3467 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003468 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003469
3470 e = p + PyString_GET_SIZE(self);
3471 for (; p < e; p++) {
3472 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003473 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003474 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003475 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003476}
3477
3478
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003479PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003480"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003481\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003482Return True if all characters in S are digits\n\
3483and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484
3485static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003486string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003487{
Fred Drakeba096332000-07-09 07:04:36 +00003488 register const unsigned char *p
3489 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003490 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003491
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492 /* Shortcut for single character strings */
3493 if (PyString_GET_SIZE(self) == 1 &&
3494 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003495 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003496
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003497 /* Special case for empty strings */
3498 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003499 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003500
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501 e = p + PyString_GET_SIZE(self);
3502 for (; p < e; p++) {
3503 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003504 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003506 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003507}
3508
3509
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003510PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003511"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003513Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003514at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515
3516static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003517string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518{
Fred Drakeba096332000-07-09 07:04:36 +00003519 register const unsigned char *p
3520 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003521 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522 int cased;
3523
Guido van Rossum4c08d552000-03-10 22:55:18 +00003524 /* Shortcut for single character strings */
3525 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003526 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003527
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003528 /* Special case for empty strings */
3529 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003530 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003531
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532 e = p + PyString_GET_SIZE(self);
3533 cased = 0;
3534 for (; p < e; p++) {
3535 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003536 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537 else if (!cased && islower(*p))
3538 cased = 1;
3539 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003540 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541}
3542
3543
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003544PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003545"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003547Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549
3550static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003551string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552{
Fred Drakeba096332000-07-09 07:04:36 +00003553 register const unsigned char *p
3554 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003555 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003556 int cased;
3557
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558 /* Shortcut for single character strings */
3559 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003562 /* Special case for empty strings */
3563 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003565
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566 e = p + PyString_GET_SIZE(self);
3567 cased = 0;
3568 for (; p < e; p++) {
3569 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003571 else if (!cased && isupper(*p))
3572 cased = 1;
3573 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003575}
3576
3577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003578PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003579"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003580\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003581Return True if S is a titlecased string and there is at least one\n\
3582character in S, i.e. uppercase characters may only follow uncased\n\
3583characters and lowercase characters only cased ones. Return False\n\
3584otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585
3586static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003587string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588{
Fred Drakeba096332000-07-09 07:04:36 +00003589 register const unsigned char *p
3590 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003591 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592 int cased, previous_is_cased;
3593
Guido van Rossum4c08d552000-03-10 22:55:18 +00003594 /* Shortcut for single character strings */
3595 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003598 /* Special case for empty strings */
3599 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003601
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602 e = p + PyString_GET_SIZE(self);
3603 cased = 0;
3604 previous_is_cased = 0;
3605 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003606 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607
3608 if (isupper(ch)) {
3609 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611 previous_is_cased = 1;
3612 cased = 1;
3613 }
3614 else if (islower(ch)) {
3615 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617 previous_is_cased = 1;
3618 cased = 1;
3619 }
3620 else
3621 previous_is_cased = 0;
3622 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624}
3625
3626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003627PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003628"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629\n\
3630Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003631Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003632is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634static PyObject*
3635string_splitlines(PyStringObject *self, PyObject *args)
3636{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003637 register Py_ssize_t i;
3638 register Py_ssize_t j;
3639 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003640 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 PyObject *list;
3642 PyObject *str;
3643 char *data;
3644
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003645 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646 return NULL;
3647
3648 data = PyString_AS_STRING(self);
3649 len = PyString_GET_SIZE(self);
3650
Thomas Wouters477c8d52006-05-27 19:21:47 +00003651 /* This does not use the preallocated list because splitlines is
3652 usually run with hundreds of newlines. The overhead of
3653 switching between PyList_SET_ITEM and append causes about a
3654 2-3% slowdown for that common case. A smarter implementation
3655 could move the if check out, so the SET_ITEMs are done first
3656 and the appends only done when the prealloc buffer is full.
3657 That's too much work for little gain.*/
3658
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659 list = PyList_New(0);
3660 if (!list)
3661 goto onError;
3662
3663 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003664 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003665
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666 /* Find a line and append it */
3667 while (i < len && data[i] != '\n' && data[i] != '\r')
3668 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669
3670 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003671 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 if (i < len) {
3673 if (data[i] == '\r' && i + 1 < len &&
3674 data[i+1] == '\n')
3675 i += 2;
3676 else
3677 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003678 if (keepends)
3679 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003681 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682 j = i;
3683 }
3684 if (j < len) {
3685 SPLIT_APPEND(data, j, len);
3686 }
3687
3688 return list;
3689
3690 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003691 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692 return NULL;
3693}
3694
3695#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003696#undef SPLIT_ADD
3697#undef MAX_PREALLOC
3698#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003700static PyObject *
3701string_getnewargs(PyStringObject *v)
3702{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003703 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003704}
3705
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003706
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003707static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003708string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003709 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3710 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003711 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003712 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3713 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003714 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3715 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3716 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3717 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3718 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3719 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3720 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003721 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3722 capitalize__doc__},
3723 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3724 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3725 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003726 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003727 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3728 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3729 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3730 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3731 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3732 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3733 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003734 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3735 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003736 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3737 startswith__doc__},
3738 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3739 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3740 swapcase__doc__},
3741 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3742 translate__doc__},
3743 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3744 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3745 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3746 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3747 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3748 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3749 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3750 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3751 expandtabs__doc__},
3752 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3753 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003754 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003755 {NULL, NULL} /* sentinel */
3756};
3757
Jeremy Hylton938ace62002-07-17 16:30:39 +00003758static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003759str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3760
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003761static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003762string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003763{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003764 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003765 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003766
Guido van Rossumae960af2001-08-30 03:11:59 +00003767 if (type != &PyString_Type)
3768 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003769 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003770 return NULL;
3771 if (x == NULL)
3772 return PyString_FromString("");
3773 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003774}
3775
Guido van Rossumae960af2001-08-30 03:11:59 +00003776static PyObject *
3777str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3778{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003779 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003780 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003781
3782 assert(PyType_IsSubtype(type, &PyString_Type));
3783 tmp = string_new(&PyString_Type, args, kwds);
3784 if (tmp == NULL)
3785 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003786 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003787 n = PyString_GET_SIZE(tmp);
3788 pnew = type->tp_alloc(type, n);
3789 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003790 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003791 ((PyStringObject *)pnew)->ob_shash =
3792 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003793 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003794 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003795 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003796 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003797}
3798
Guido van Rossumcacfc072002-05-24 19:01:59 +00003799static PyObject *
3800basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3801{
3802 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003803 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003804 return NULL;
3805}
3806
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003807static PyObject *
3808string_mod(PyObject *v, PyObject *w)
3809{
3810 if (!PyString_Check(v)) {
3811 Py_INCREF(Py_NotImplemented);
3812 return Py_NotImplemented;
3813 }
3814 return PyString_Format(v, w);
3815}
3816
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003817PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003818"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003819
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003820static PyNumberMethods string_as_number = {
3821 0, /*nb_add*/
3822 0, /*nb_subtract*/
3823 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003824 string_mod, /*nb_remainder*/
3825};
3826
3827
Guido van Rossumcacfc072002-05-24 19:01:59 +00003828PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003829 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003830 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003831 0,
3832 0,
3833 0, /* tp_dealloc */
3834 0, /* tp_print */
3835 0, /* tp_getattr */
3836 0, /* tp_setattr */
3837 0, /* tp_compare */
3838 0, /* tp_repr */
3839 0, /* tp_as_number */
3840 0, /* tp_as_sequence */
3841 0, /* tp_as_mapping */
3842 0, /* tp_hash */
3843 0, /* tp_call */
3844 0, /* tp_str */
3845 0, /* tp_getattro */
3846 0, /* tp_setattro */
3847 0, /* tp_as_buffer */
3848 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3849 basestring_doc, /* tp_doc */
3850 0, /* tp_traverse */
3851 0, /* tp_clear */
3852 0, /* tp_richcompare */
3853 0, /* tp_weaklistoffset */
3854 0, /* tp_iter */
3855 0, /* tp_iternext */
3856 0, /* tp_methods */
3857 0, /* tp_members */
3858 0, /* tp_getset */
3859 &PyBaseObject_Type, /* tp_base */
3860 0, /* tp_dict */
3861 0, /* tp_descr_get */
3862 0, /* tp_descr_set */
3863 0, /* tp_dictoffset */
3864 0, /* tp_init */
3865 0, /* tp_alloc */
3866 basestring_new, /* tp_new */
3867 0, /* tp_free */
3868};
3869
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003870PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003871"str(object) -> string\n\
3872\n\
3873Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003874If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003875
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003876static PyObject *str_iter(PyObject *seq);
3877
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003878PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003879 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003880 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003881 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003882 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003883 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003884 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003885 0, /* tp_getattr */
3886 0, /* tp_setattr */
3887 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003888 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003889 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003890 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003891 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003892 (hashfunc)string_hash, /* tp_hash */
3893 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003894 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003895 PyObject_GenericGetAttr, /* tp_getattro */
3896 0, /* tp_setattro */
3897 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003898 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3899 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003900 string_doc, /* tp_doc */
3901 0, /* tp_traverse */
3902 0, /* tp_clear */
3903 (richcmpfunc)string_richcompare, /* tp_richcompare */
3904 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003905 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003906 0, /* tp_iternext */
3907 string_methods, /* tp_methods */
3908 0, /* tp_members */
3909 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003910 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003911 0, /* tp_dict */
3912 0, /* tp_descr_get */
3913 0, /* tp_descr_set */
3914 0, /* tp_dictoffset */
3915 0, /* tp_init */
3916 0, /* tp_alloc */
3917 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003918 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003919};
3920
3921void
Fred Drakeba096332000-07-09 07:04:36 +00003922PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003923{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003924 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003925 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003926 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003927 if (w == NULL || !PyString_Check(*pv)) {
3928 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003929 *pv = NULL;
3930 return;
3931 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003932 v = string_concat((PyStringObject *) *pv, w);
3933 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003934 *pv = v;
3935}
3936
Guido van Rossum013142a1994-08-30 08:19:36 +00003937void
Fred Drakeba096332000-07-09 07:04:36 +00003938PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003939{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003940 PyString_Concat(pv, w);
3941 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003942}
3943
3944
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003945/* The following function breaks the notion that strings are immutable:
3946 it changes the size of a string. We get away with this only if there
3947 is only one module referencing the object. You can also think of it
3948 as creating a new string object and destroying the old one, only
3949 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003950 already be known to some other part of the code...
3951 Note that if there's not enough memory to resize the string, the original
3952 string object at *pv is deallocated, *pv is set to NULL, an "out of
3953 memory" exception is set, and -1 is returned. Else (on success) 0 is
3954 returned, and the value in *pv may or may not be the same as on input.
3955 As always, an extra byte is allocated for a trailing \0 byte (newsize
3956 does *not* include that), and a trailing \0 byte is stored.
3957*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003958
3959int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003960_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003961{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003962 register PyObject *v;
3963 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003964 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003965 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00003966 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003967 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 Py_DECREF(v);
3969 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003970 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003971 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003972 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003973 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003974 _Py_ForgetReference(v);
3975 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003976 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003977 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003978 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003980 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003981 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003982 _Py_NewReference(*pv);
3983 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003984 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003985 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003986 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003987 return 0;
3988}
Guido van Rossume5372401993-03-16 12:15:04 +00003989
3990/* Helpers for formatstring */
3991
Thomas Wouters477c8d52006-05-27 19:21:47 +00003992Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00003993getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003994{
Thomas Wouters977485d2006-02-16 15:59:12 +00003995 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003996 if (argidx < arglen) {
3997 (*p_argidx)++;
3998 if (arglen < 0)
3999 return args;
4000 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004001 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004002 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004003 PyErr_SetString(PyExc_TypeError,
4004 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004005 return NULL;
4006}
4007
Tim Peters38fd5b62000-09-21 05:43:11 +00004008/* Format codes
4009 * F_LJUST '-'
4010 * F_SIGN '+'
4011 * F_BLANK ' '
4012 * F_ALT '#'
4013 * F_ZERO '0'
4014 */
Guido van Rossume5372401993-03-16 12:15:04 +00004015#define F_LJUST (1<<0)
4016#define F_SIGN (1<<1)
4017#define F_BLANK (1<<2)
4018#define F_ALT (1<<3)
4019#define F_ZERO (1<<4)
4020
Thomas Wouters477c8d52006-05-27 19:21:47 +00004021Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004022formatfloat(char *buf, size_t buflen, int flags,
4023 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004024{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004025 /* fmt = '%#.' + `prec` + `type`
4026 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004027 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004028 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004029 x = PyFloat_AsDouble(v);
4030 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004031 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004032 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004033 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004034 }
Guido van Rossume5372401993-03-16 12:15:04 +00004035 if (prec < 0)
4036 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004037 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4038 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004039 /* Worst case length calc to ensure no buffer overrun:
4040
4041 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004042 fmt = %#.<prec>g
4043 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004044 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004045 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004046
4047 'f' formats:
4048 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4049 len = 1 + 50 + 1 + prec = 52 + prec
4050
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004051 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004052 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004053
4054 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004055 if (((type == 'g' || type == 'G') &&
4056 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004057 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004058 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004059 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004060 return -1;
4061 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004062 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4063 (flags&F_ALT) ? "#" : "",
4064 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004065 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004066 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004067}
4068
Tim Peters38fd5b62000-09-21 05:43:11 +00004069/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4070 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4071 * Python's regular ints.
4072 * Return value: a new PyString*, or NULL if error.
4073 * . *pbuf is set to point into it,
4074 * *plen set to the # of chars following that.
4075 * Caller must decref it when done using pbuf.
4076 * The string starting at *pbuf is of the form
4077 * "-"? ("0x" | "0X")? digit+
4078 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004079 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004080 * There will be at least prec digits, zero-filled on the left if
4081 * necessary to get that many.
4082 * val object to be converted
4083 * flags bitmask of format flags; only F_ALT is looked at
4084 * prec minimum number of digits; 0-fill on left if needed
4085 * type a character in [duoxX]; u acts the same as d
4086 *
4087 * CAUTION: o, x and X conversions on regular ints can never
4088 * produce a '-' sign, but can for Python's unbounded ints.
4089 */
4090PyObject*
4091_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4092 char **pbuf, int *plen)
4093{
4094 PyObject *result = NULL;
4095 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004096 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004097 int sign; /* 1 if '-', else 0 */
4098 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004099 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004100 int numdigits; /* len == numnondigits + numdigits */
4101 int numnondigits = 0;
4102
Guido van Rossumddefaf32007-01-14 03:31:43 +00004103 /* Avoid exceeding SSIZE_T_MAX */
4104 if (prec > PY_SSIZE_T_MAX-3) {
4105 PyErr_SetString(PyExc_OverflowError,
4106 "precision too large");
4107 return NULL;
4108 }
4109
Tim Peters38fd5b62000-09-21 05:43:11 +00004110 switch (type) {
4111 case 'd':
4112 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00004113 /* Special-case boolean: we want 0/1 */
4114 if (PyBool_Check(val))
4115 result = PyNumber_ToBase(val, 10);
4116 else
4117 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004118 break;
4119 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004120 numnondigits = 2;
4121 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004122 break;
4123 case 'x':
4124 case 'X':
4125 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004126 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004127 break;
4128 default:
4129 assert(!"'type' not in [duoxX]");
4130 }
4131 if (!result)
4132 return NULL;
4133
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004134 buf = PyString_AsString(result);
4135 if (!buf) {
4136 Py_DECREF(result);
4137 return NULL;
4138 }
4139
Tim Peters38fd5b62000-09-21 05:43:11 +00004140 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004141 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004142 PyErr_BadInternalCall();
4143 return NULL;
4144 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004145 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004146 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004147 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4148 return NULL;
4149 }
4150 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004151 if (buf[len-1] == 'L') {
4152 --len;
4153 buf[len] = '\0';
4154 }
4155 sign = buf[0] == '-';
4156 numnondigits += sign;
4157 numdigits = len - numnondigits;
4158 assert(numdigits > 0);
4159
Tim Petersfff53252001-04-12 18:38:48 +00004160 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004161 if (((flags & F_ALT) == 0 &&
4162 (type == 'o' || type == 'x' || type == 'X'))) {
4163 assert(buf[sign] == '0');
4164 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4165 buf[sign+1] == 'o');
4166 numnondigits -= 2;
4167 buf += 2;
4168 len -= 2;
4169 if (sign)
4170 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004171 assert(len == numnondigits + numdigits);
4172 assert(numdigits > 0);
4173 }
4174
4175 /* Fill with leading zeroes to meet minimum width. */
4176 if (prec > numdigits) {
4177 PyObject *r1 = PyString_FromStringAndSize(NULL,
4178 numnondigits + prec);
4179 char *b1;
4180 if (!r1) {
4181 Py_DECREF(result);
4182 return NULL;
4183 }
4184 b1 = PyString_AS_STRING(r1);
4185 for (i = 0; i < numnondigits; ++i)
4186 *b1++ = *buf++;
4187 for (i = 0; i < prec - numdigits; i++)
4188 *b1++ = '0';
4189 for (i = 0; i < numdigits; i++)
4190 *b1++ = *buf++;
4191 *b1 = '\0';
4192 Py_DECREF(result);
4193 result = r1;
4194 buf = PyString_AS_STRING(result);
4195 len = numnondigits + prec;
4196 }
4197
4198 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004199 if (type == 'X') {
4200 /* Need to convert all lower case letters to upper case.
4201 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004202 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004203 if (buf[i] >= 'a' && buf[i] <= 'x')
4204 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004205 }
4206 *pbuf = buf;
4207 *plen = len;
4208 return result;
4209}
4210
Thomas Wouters477c8d52006-05-27 19:21:47 +00004211Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004212formatint(char *buf, size_t buflen, int flags,
4213 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004214{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004215 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004216 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4217 + 1 + 1 = 24 */
4218 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004219 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004220 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004221
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004222 x = PyInt_AsLong(v);
4223 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004224 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004225 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004226 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004227 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004228 if (x < 0 && type == 'u') {
4229 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004230 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004231 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4232 sign = "-";
4233 else
4234 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004235 if (prec < 0)
4236 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004237
4238 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004239 (type == 'x' || type == 'X' || type == 'o')) {
4240 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004241 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004242 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004243 * - when 0 is being converted, the C standard leaves off
4244 * the '0x' or '0X', which is inconsistent with other
4245 * %#x/%#X conversions and inconsistent with Python's
4246 * hex() function
4247 * - there are platforms that violate the standard and
4248 * convert 0 with the '0x' or '0X'
4249 * (Metrowerks, Compaq Tru64)
4250 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004251 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004252 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004253 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004254 * We can achieve the desired consistency by inserting our
4255 * own '0x' or '0X' prefix, and substituting %x/%X in place
4256 * of %#x/%#X.
4257 *
4258 * Note that this is the same approach as used in
4259 * formatint() in unicodeobject.c
4260 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004261 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4262 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004263 }
4264 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004265 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4266 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004267 prec, type);
4268 }
4269
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004270 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004271 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004272 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004273 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004274 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004275 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004276 return -1;
4277 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004278 if (sign[0])
4279 PyOS_snprintf(buf, buflen, fmt, -x);
4280 else
4281 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004282 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004283}
4284
Thomas Wouters477c8d52006-05-27 19:21:47 +00004285Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004286formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004287{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004288 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004289 if (PyString_Check(v)) {
4290 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004291 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004292 }
4293 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004294 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004295 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004296 }
4297 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004298 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004299}
4300
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004301/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4302
4303 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4304 chars are formatted. XXX This is a magic number. Each formatting
4305 routine does bounds checking to ensure no overflow, but a better
4306 solution may be to malloc a buffer of appropriate size for each
4307 format. For now, the current solution is sufficient.
4308*/
4309#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004310
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004311PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004312PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004313{
4314 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004315 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004316 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004317 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004318 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004319 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004320 PyObject *dict = NULL;
4321 if (format == NULL || !PyString_Check(format) || args == NULL) {
4322 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004323 return NULL;
4324 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004325 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004326 fmt = PyString_AS_STRING(format);
4327 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004328 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004329 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004330 if (result == NULL)
4331 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004332 res = PyString_AsString(result);
4333 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004334 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004335 argidx = 0;
4336 }
4337 else {
4338 arglen = -1;
4339 argidx = -2;
4340 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004341 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004342 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004343 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004344 while (--fmtcnt >= 0) {
4345 if (*fmt != '%') {
4346 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004347 rescnt = fmtcnt + 100;
4348 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004349 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004350 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004351 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004352 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004353 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004354 }
4355 *res++ = *fmt++;
4356 }
4357 else {
4358 /* Got a format specifier */
4359 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004360 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004361 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004362 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004363 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004364 PyObject *v = NULL;
4365 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004366 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004367 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004368 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004369 char formatbuf[FORMATBUFLEN];
4370 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004371 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004372 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004373
Guido van Rossumda9c2711996-12-05 21:58:58 +00004374 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004375 if (*fmt == '(') {
4376 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004377 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004378 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004379 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004380
4381 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004382 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004383 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004384 goto error;
4385 }
4386 ++fmt;
4387 --fmtcnt;
4388 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004389 /* Skip over balanced parentheses */
4390 while (pcount > 0 && --fmtcnt >= 0) {
4391 if (*fmt == ')')
4392 --pcount;
4393 else if (*fmt == '(')
4394 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004395 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004396 }
4397 keylen = fmt - keystart - 1;
4398 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004399 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004400 "incomplete format key");
4401 goto error;
4402 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004403 key = PyString_FromStringAndSize(keystart,
4404 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004405 if (key == NULL)
4406 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004407 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004408 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004409 args_owned = 0;
4410 }
4411 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004412 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004413 if (args == NULL) {
4414 goto error;
4415 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004416 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004417 arglen = -1;
4418 argidx = -2;
4419 }
Guido van Rossume5372401993-03-16 12:15:04 +00004420 while (--fmtcnt >= 0) {
4421 switch (c = *fmt++) {
4422 case '-': flags |= F_LJUST; continue;
4423 case '+': flags |= F_SIGN; continue;
4424 case ' ': flags |= F_BLANK; continue;
4425 case '#': flags |= F_ALT; continue;
4426 case '0': flags |= F_ZERO; continue;
4427 }
4428 break;
4429 }
4430 if (c == '*') {
4431 v = getnextarg(args, arglen, &argidx);
4432 if (v == NULL)
4433 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004434 if (!PyInt_Check(v)) {
4435 PyErr_SetString(PyExc_TypeError,
4436 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004437 goto error;
4438 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004439 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004440 if (width == -1 && PyErr_Occurred())
4441 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004442 if (width < 0) {
4443 flags |= F_LJUST;
4444 width = -width;
4445 }
Guido van Rossume5372401993-03-16 12:15:04 +00004446 if (--fmtcnt >= 0)
4447 c = *fmt++;
4448 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004449 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004450 width = c - '0';
4451 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004452 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004453 if (!isdigit(c))
4454 break;
4455 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004456 PyErr_SetString(
4457 PyExc_ValueError,
4458 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004459 goto error;
4460 }
4461 width = width*10 + (c - '0');
4462 }
4463 }
4464 if (c == '.') {
4465 prec = 0;
4466 if (--fmtcnt >= 0)
4467 c = *fmt++;
4468 if (c == '*') {
4469 v = getnextarg(args, arglen, &argidx);
4470 if (v == NULL)
4471 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004472 if (!PyInt_Check(v)) {
4473 PyErr_SetString(
4474 PyExc_TypeError,
4475 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004476 goto error;
4477 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004478 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004479 if (prec == -1 && PyErr_Occurred())
4480 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004481 if (prec < 0)
4482 prec = 0;
4483 if (--fmtcnt >= 0)
4484 c = *fmt++;
4485 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004486 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004487 prec = c - '0';
4488 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004489 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004490 if (!isdigit(c))
4491 break;
4492 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004493 PyErr_SetString(
4494 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004495 "prec too big");
4496 goto error;
4497 }
4498 prec = prec*10 + (c - '0');
4499 }
4500 }
4501 } /* prec */
4502 if (fmtcnt >= 0) {
4503 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004504 if (--fmtcnt >= 0)
4505 c = *fmt++;
4506 }
4507 }
4508 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004509 PyErr_SetString(PyExc_ValueError,
4510 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004511 goto error;
4512 }
4513 if (c != '%') {
4514 v = getnextarg(args, arglen, &argidx);
4515 if (v == NULL)
4516 goto error;
4517 }
4518 sign = 0;
4519 fill = ' ';
4520 switch (c) {
4521 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004522 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004523 len = 1;
4524 break;
4525 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004526 if (PyUnicode_Check(v)) {
4527 fmt = fmt_start;
4528 argidx = argidx_start;
4529 goto unicode;
4530 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004531 temp = _PyObject_Str(v);
4532 if (temp != NULL && PyUnicode_Check(temp)) {
4533 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004534 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004535 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004536 goto unicode;
4537 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004538 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004539 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004540 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004541 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004542 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004543 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004544 if (!PyString_Check(temp)) {
4545 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004546 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004547 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004548 goto error;
4549 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004550 pbuf = PyString_AS_STRING(temp);
4551 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004552 if (prec >= 0 && len > prec)
4553 len = prec;
4554 break;
4555 case 'i':
4556 case 'd':
4557 case 'u':
4558 case 'o':
4559 case 'x':
4560 case 'X':
4561 if (c == 'i')
4562 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004563 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004564 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004565 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004566 prec, c, &pbuf, &ilen);
4567 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004568 if (!temp)
4569 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004570 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004571 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004572 else {
4573 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004574 len = formatint(pbuf,
4575 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004576 flags, prec, c, v);
4577 if (len < 0)
4578 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004579 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004580 }
4581 if (flags & F_ZERO)
4582 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004583 break;
4584 case 'e':
4585 case 'E':
4586 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004587 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004588 case 'g':
4589 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004590 if (c == 'F')
4591 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004592 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004593 len = formatfloat(pbuf, sizeof(formatbuf),
4594 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004595 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004596 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004597 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004598 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004599 fill = '0';
4600 break;
4601 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004602 if (PyUnicode_Check(v)) {
4603 fmt = fmt_start;
4604 argidx = argidx_start;
4605 goto unicode;
4606 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004607 pbuf = formatbuf;
4608 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004609 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004610 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004611 break;
4612 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004613 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004614 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004615 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004616 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004617 (Py_ssize_t)(fmt - 1 -
4618 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004619 goto error;
4620 }
4621 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004622 if (*pbuf == '-' || *pbuf == '+') {
4623 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004624 len--;
4625 }
4626 else if (flags & F_SIGN)
4627 sign = '+';
4628 else if (flags & F_BLANK)
4629 sign = ' ';
4630 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004631 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004632 }
4633 if (width < len)
4634 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004635 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004636 reslen -= rescnt;
4637 rescnt = width + fmtcnt + 100;
4638 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004639 if (reslen < 0) {
4640 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004641 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004642 return PyErr_NoMemory();
4643 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004644 if (_PyString_Resize(&result, reslen) < 0) {
4645 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004646 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004647 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004648 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004650 }
4651 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004652 if (fill != ' ')
4653 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004654 rescnt--;
4655 if (width > len)
4656 width--;
4657 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004658 if ((flags & F_ALT) &&
4659 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004660 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004661 assert(pbuf[1] == c);
4662 if (fill != ' ') {
4663 *res++ = *pbuf++;
4664 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004665 }
Tim Petersfff53252001-04-12 18:38:48 +00004666 rescnt -= 2;
4667 width -= 2;
4668 if (width < 0)
4669 width = 0;
4670 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004671 }
4672 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004673 do {
4674 --rescnt;
4675 *res++ = fill;
4676 } while (--width > len);
4677 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004678 if (fill == ' ') {
4679 if (sign)
4680 *res++ = sign;
4681 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004682 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004683 assert(pbuf[0] == '0');
4684 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004685 *res++ = *pbuf++;
4686 *res++ = *pbuf++;
4687 }
4688 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004689 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004690 res += len;
4691 rescnt -= len;
4692 while (--width >= len) {
4693 --rescnt;
4694 *res++ = ' ';
4695 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004696 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004697 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004698 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004699 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004700 goto error;
4701 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004702 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004703 } /* '%' */
4704 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004705 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004706 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004707 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004708 goto error;
4709 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004710 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004711 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004712 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004713 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004714 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004715
4716 unicode:
4717 if (args_owned) {
4718 Py_DECREF(args);
4719 args_owned = 0;
4720 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004721 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004722 if (PyTuple_Check(orig_args) && argidx > 0) {
4723 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004724 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004725 v = PyTuple_New(n);
4726 if (v == NULL)
4727 goto error;
4728 while (--n >= 0) {
4729 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4730 Py_INCREF(w);
4731 PyTuple_SET_ITEM(v, n, w);
4732 }
4733 args = v;
4734 } else {
4735 Py_INCREF(orig_args);
4736 args = orig_args;
4737 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004738 args_owned = 1;
4739 /* Take what we have of the result and let the Unicode formatting
4740 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004741 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004742 if (_PyString_Resize(&result, rescnt))
4743 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004744 fmtcnt = PyString_GET_SIZE(format) - \
4745 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004746 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4747 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004748 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004749 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004750 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004751 if (v == NULL)
4752 goto error;
4753 /* Paste what we have (result) to what the Unicode formatting
4754 function returned (v) and return the result (or error) */
4755 w = PyUnicode_Concat(result, v);
4756 Py_DECREF(result);
4757 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004758 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004759 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004760
Guido van Rossume5372401993-03-16 12:15:04 +00004761 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004762 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004763 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004764 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004765 }
Guido van Rossume5372401993-03-16 12:15:04 +00004766 return NULL;
4767}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004768
Guido van Rossum2a61e741997-01-18 07:55:05 +00004769void
Fred Drakeba096332000-07-09 07:04:36 +00004770PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004771{
4772 register PyStringObject *s = (PyStringObject *)(*p);
4773 PyObject *t;
4774 if (s == NULL || !PyString_Check(s))
4775 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004776 /* If it's a string subclass, we don't really know what putting
4777 it in the interned dict might do. */
4778 if (!PyString_CheckExact(s))
4779 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004780 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004781 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004782 if (interned == NULL) {
4783 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004784 if (interned == NULL) {
4785 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004786 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004787 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004788 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004789 t = PyDict_GetItem(interned, (PyObject *)s);
4790 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004791 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004792 Py_DECREF(*p);
4793 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004794 return;
4795 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004796
Armin Rigo79f7ad22004-08-07 19:27:39 +00004797 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004798 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004799 return;
4800 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004801 /* The two references in interned are not counted by refcnt.
4802 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004803 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004804 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004805}
4806
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004807void
4808PyString_InternImmortal(PyObject **p)
4809{
4810 PyString_InternInPlace(p);
4811 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4812 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4813 Py_INCREF(*p);
4814 }
4815}
4816
Guido van Rossum2a61e741997-01-18 07:55:05 +00004817
4818PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004819PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004820{
4821 PyObject *s = PyString_FromString(cp);
4822 if (s == NULL)
4823 return NULL;
4824 PyString_InternInPlace(&s);
4825 return s;
4826}
4827
Guido van Rossum8cf04761997-08-02 02:57:45 +00004828void
Fred Drakeba096332000-07-09 07:04:36 +00004829PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004830{
4831 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004832 for (i = 0; i < UCHAR_MAX + 1; i++) {
4833 Py_XDECREF(characters[i]);
4834 characters[i] = NULL;
4835 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004836 Py_XDECREF(nullstring);
4837 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004838}
Barry Warsawa903ad982001-02-23 16:40:48 +00004839
Barry Warsawa903ad982001-02-23 16:40:48 +00004840void _Py_ReleaseInternedStrings(void)
4841{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004842 PyObject *keys;
4843 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004844 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004845 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004846
4847 if (interned == NULL || !PyDict_Check(interned))
4848 return;
4849 keys = PyDict_Keys(interned);
4850 if (keys == NULL || !PyList_Check(keys)) {
4851 PyErr_Clear();
4852 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004853 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004854
4855 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4856 detector, interned strings are not forcibly deallocated; rather, we
4857 give them their stolen references back, and then clear and DECREF
4858 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004859
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004860 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004861 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4862 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004863 for (i = 0; i < n; i++) {
4864 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4865 switch (s->ob_sstate) {
4866 case SSTATE_NOT_INTERNED:
4867 /* XXX Shouldn't happen */
4868 break;
4869 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004870 Py_Refcnt(s) += 1;
4871 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004872 break;
4873 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004874 Py_Refcnt(s) += 2;
4875 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004876 break;
4877 default:
4878 Py_FatalError("Inconsistent interned string state.");
4879 }
4880 s->ob_sstate = SSTATE_NOT_INTERNED;
4881 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004882 fprintf(stderr, "total size of all interned strings: "
4883 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4884 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004885 Py_DECREF(keys);
4886 PyDict_Clear(interned);
4887 Py_DECREF(interned);
4888 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004889}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004890
4891
4892/*********************** Str Iterator ****************************/
4893
4894typedef struct {
4895 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004896 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004897 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4898} striterobject;
4899
4900static void
4901striter_dealloc(striterobject *it)
4902{
4903 _PyObject_GC_UNTRACK(it);
4904 Py_XDECREF(it->it_seq);
4905 PyObject_GC_Del(it);
4906}
4907
4908static int
4909striter_traverse(striterobject *it, visitproc visit, void *arg)
4910{
4911 Py_VISIT(it->it_seq);
4912 return 0;
4913}
4914
4915static PyObject *
4916striter_next(striterobject *it)
4917{
4918 PyStringObject *seq;
4919 PyObject *item;
4920
4921 assert(it != NULL);
4922 seq = it->it_seq;
4923 if (seq == NULL)
4924 return NULL;
4925 assert(PyString_Check(seq));
4926
4927 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004928 item = PyString_FromStringAndSize(
4929 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004930 if (item != NULL)
4931 ++it->it_index;
4932 return item;
4933 }
4934
4935 Py_DECREF(seq);
4936 it->it_seq = NULL;
4937 return NULL;
4938}
4939
4940static PyObject *
4941striter_len(striterobject *it)
4942{
4943 Py_ssize_t len = 0;
4944 if (it->it_seq)
4945 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4946 return PyInt_FromSsize_t(len);
4947}
4948
Guido van Rossum49d6b072006-08-17 21:11:47 +00004949PyDoc_STRVAR(length_hint_doc,
4950 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004951
4952static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004953 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4954 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004955 {NULL, NULL} /* sentinel */
4956};
4957
4958PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004959 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004960 "striterator", /* tp_name */
4961 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004962 0, /* tp_itemsize */
4963 /* methods */
4964 (destructor)striter_dealloc, /* tp_dealloc */
4965 0, /* tp_print */
4966 0, /* tp_getattr */
4967 0, /* tp_setattr */
4968 0, /* tp_compare */
4969 0, /* tp_repr */
4970 0, /* tp_as_number */
4971 0, /* tp_as_sequence */
4972 0, /* tp_as_mapping */
4973 0, /* tp_hash */
4974 0, /* tp_call */
4975 0, /* tp_str */
4976 PyObject_GenericGetAttr, /* tp_getattro */
4977 0, /* tp_setattro */
4978 0, /* tp_as_buffer */
4979 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4980 0, /* tp_doc */
4981 (traverseproc)striter_traverse, /* tp_traverse */
4982 0, /* tp_clear */
4983 0, /* tp_richcompare */
4984 0, /* tp_weaklistoffset */
4985 PyObject_SelfIter, /* tp_iter */
4986 (iternextfunc)striter_next, /* tp_iternext */
4987 striter_methods, /* tp_methods */
4988 0,
4989};
4990
4991static PyObject *
4992str_iter(PyObject *seq)
4993{
4994 striterobject *it;
4995
4996 if (!PyString_Check(seq)) {
4997 PyErr_BadInternalCall();
4998 return NULL;
4999 }
5000 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5001 if (it == NULL)
5002 return NULL;
5003 it->it_index = 0;
5004 Py_INCREF(seq);
5005 it->it_seq = (PyStringObject *)seq;
5006 _PyObject_GC_TRACK(it);
5007 return (PyObject *)it;
5008}