blob: 248070f532d12ab5671d2cb83ed82b339a09b036 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
417 v->ob_type->tp_name);
418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
490 v->ob_type->tp_name);
491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
510 op->ob_refcnt = 3;
511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000522 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000689 if (!PyString_Check(op))
690 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000692}
693
694/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000695PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (!PyString_Check(op))
698 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000699 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700}
701
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702int
703PyString_AsStringAndSize(register PyObject *obj,
704 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000705 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706{
707 if (s == NULL) {
708 PyErr_BadInternalCall();
709 return -1;
710 }
711
712 if (!PyString_Check(obj)) {
713 if (PyUnicode_Check(obj)) {
714 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
715 if (obj == NULL)
716 return -1;
717 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000718 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000719 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 PyErr_Format(PyExc_TypeError,
Guido van Rossum8d30cc02007-05-03 17:49:24 +0000721 "expected str object, "
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722 "%.200s found", obj->ob_type->tp_name);
723 return -1;
724 }
725 }
726
727 *s = PyString_AS_STRING(obj);
728 if (len != NULL)
729 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000730 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731 PyErr_SetString(PyExc_TypeError,
732 "expected string without null bytes");
733 return -1;
734 }
735 return 0;
736}
737
Thomas Wouters477c8d52006-05-27 19:21:47 +0000738/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000739/* Methods */
740
Thomas Wouters477c8d52006-05-27 19:21:47 +0000741#define STRINGLIB_CHAR char
742
743#define STRINGLIB_CMP memcmp
744#define STRINGLIB_LEN PyString_GET_SIZE
745#define STRINGLIB_NEW PyString_FromStringAndSize
746#define STRINGLIB_STR PyString_AS_STRING
747
748#define STRINGLIB_EMPTY nullstring
749
750#include "stringlib/fastsearch.h"
751
752#include "stringlib/count.h"
753#include "stringlib/find.h"
754#include "stringlib/partition.h"
755
756
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000757static int
Fred Drakeba096332000-07-09 07:04:36 +0000758string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000759{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000760 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000762 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000763
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000764 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000765 if (! PyString_CheckExact(op)) {
766 int ret;
767 /* A str subclass may have its own __str__ method. */
768 op = (PyStringObject *) PyObject_Str((PyObject *)op);
769 if (op == NULL)
770 return -1;
771 ret = string_print(op, fp, flags);
772 Py_DECREF(op);
773 return ret;
774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000775 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000776 char *data = op->ob_sval;
777 Py_ssize_t size = op->ob_size;
778 while (size > INT_MAX) {
779 /* Very long strings cannot be written atomically.
780 * But don't write exactly INT_MAX bytes at a time
781 * to avoid memory aligment issues.
782 */
783 const int chunk_size = INT_MAX & ~0x3FFF;
784 fwrite(data, 1, chunk_size, fp);
785 data += chunk_size;
786 size -= chunk_size;
787 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000788#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000789 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000790#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000791 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000792#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795
Thomas Wouters7e474022000-07-16 12:04:32 +0000796 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000797 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000798 if (memchr(op->ob_sval, '\'', op->ob_size) &&
799 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000800 quote = '"';
801
802 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000803 for (i = 0; i < op->ob_size; i++) {
804 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000806 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000807 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000808 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000809 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000810 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000811 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000812 fprintf(fp, "\\r");
813 else if (c < ' ' || c >= 0x7f)
814 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000815 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000816 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000819 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000820}
821
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000822PyObject *
823PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000825 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000826 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000827 PyObject *v;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000828 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000829 PyErr_SetString(PyExc_OverflowError,
830 "string is too large to make repr");
831 }
832 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000834 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 }
836 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000837 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838 register char c;
839 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000840 int quote;
841
Thomas Wouters7e474022000-07-16 12:04:32 +0000842 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000843 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000844 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000845 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000846 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000847 quote = '"';
848
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000850 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 /* There's at least enough room for a hex escape
853 and a closing quote. */
854 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000858 else if (c == '\t')
859 *p++ = '\\', *p++ = 't';
860 else if (c == '\n')
861 *p++ = '\\', *p++ = 'n';
862 else if (c == '\r')
863 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000864 else if (c < ' ' || c >= 0x7f) {
865 /* For performance, we don't want to call
866 PyOS_snprintf here (extra layers of
867 function call). */
868 sprintf(p, "\\x%02x", c & 0xff);
869 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000870 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000871 else
872 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000874 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000875 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000877 _PyString_Resize(
Thomas Woutersd4ec0c32006-04-21 16:44:05 +0000878 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000879 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881}
882
Guido van Rossum189f1df2001-05-01 16:51:53 +0000883static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000884string_repr(PyObject *op)
885{
886 return PyString_Repr(op, 1);
887}
888
889static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000890string_str(PyObject *s)
891{
Tim Petersc9933152001-10-16 20:18:24 +0000892 assert(PyString_Check(s));
893 if (PyString_CheckExact(s)) {
894 Py_INCREF(s);
895 return s;
896 }
897 else {
898 /* Subtype -- return genuine string with the same value. */
899 PyStringObject *t = (PyStringObject *) s;
900 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
901 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000902}
903
Martin v. Löwis18e16552006-02-15 17:27:45 +0000904static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000905string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906{
907 return a->ob_size;
908}
909
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000911string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000913 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 register PyStringObject *op;
915 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000916 if (PyUnicode_Check(bb))
917 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000918 if (PyBytes_Check(bb))
919 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000920 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000921 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000922 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923 return NULL;
924 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000927 if ((a->ob_size == 0 || b->ob_size == 0) &&
928 PyString_CheckExact(a) && PyString_CheckExact(b)) {
929 if (a->ob_size == 0) {
930 Py_INCREF(bb);
931 return bb;
932 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 Py_INCREF(a);
934 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000935 }
936 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000937 if (size < 0) {
938 PyErr_SetString(PyExc_OverflowError,
939 "strings are too large to concat");
940 return NULL;
941 }
942
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000943 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000944 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000945 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000947 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000948 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000949 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000950 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
951 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000952 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954#undef b
955}
956
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000958string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000960 register Py_ssize_t i;
961 register Py_ssize_t j;
962 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000964 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 if (n < 0)
966 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000967 /* watch out for overflows: the size can overflow int,
968 * and the # of bytes needed can overflow size_t
969 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000971 if (n && size / n != a->ob_size) {
972 PyErr_SetString(PyExc_OverflowError,
973 "repeated string is too long");
974 return NULL;
975 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000976 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 Py_INCREF(a);
978 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979 }
Tim Peterse7c05322004-06-27 17:24:49 +0000980 nbytes = (size_t)size;
981 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000982 PyErr_SetString(PyExc_OverflowError,
983 "repeated string is too long");
984 return NULL;
985 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000987 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000988 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000990 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000991 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000992 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000993 op->ob_sval[size] = '\0';
994 if (a->ob_size == 1 && n > 0) {
995 memset(op->ob_sval, a->ob_sval[0] , n);
996 return (PyObject *) op;
997 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000998 i = 0;
999 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001000 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001001 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001002 }
1003 while (i < size) {
1004 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001005 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001006 i += j;
1007 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009}
1010
1011/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1012
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001013static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001014string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001015 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001016 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001017{
1018 if (i < 0)
1019 i = 0;
1020 if (j < 0)
1021 j = 0; /* Avoid signed/unsigned bug in next line */
1022 if (j > a->ob_size)
1023 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001024 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1025 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026 Py_INCREF(a);
1027 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028 }
1029 if (j < i)
1030 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001031 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032}
1033
Guido van Rossum9284a572000-03-07 15:53:43 +00001034static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001035string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001036{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001037 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038 if (PyUnicode_Check(sub_obj))
1039 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001041 PyErr_Format(PyExc_TypeError,
1042 "'in <string>' requires string as left operand, "
1043 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001044 return -1;
1045 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001046 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001047
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001049}
1050
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001051static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001052string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001053{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001057 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001058 return NULL;
1059 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001060 pchar = a->ob_sval[i];
1061 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001062 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001063 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001064 else {
1065#ifdef COUNT_ALLOCS
1066 one_strings++;
1067#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001068 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001069 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001070 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001071}
1072
Martin v. Löwiscd353062001-05-24 16:56:35 +00001073static PyObject*
1074string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001076 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001077 Py_ssize_t len_a, len_b;
1078 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001079 PyObject *result;
1080
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001081 /* Make sure both arguments are strings. */
1082 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001083 result = Py_NotImplemented;
1084 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001085 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086 if (a == b) {
1087 switch (op) {
1088 case Py_EQ:case Py_LE:case Py_GE:
1089 result = Py_True;
1090 goto out;
1091 case Py_NE:case Py_LT:case Py_GT:
1092 result = Py_False;
1093 goto out;
1094 }
1095 }
1096 if (op == Py_EQ) {
1097 /* Supporting Py_NE here as well does not save
1098 much time, since Py_NE is rarely used. */
1099 if (a->ob_size == b->ob_size
1100 && (a->ob_sval[0] == b->ob_sval[0]
Thomas Wouters27d517b2007-02-25 20:39:11 +00001101 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001102 result = Py_True;
1103 } else {
1104 result = Py_False;
1105 }
1106 goto out;
1107 }
1108 len_a = a->ob_size; len_b = b->ob_size;
1109 min_len = (len_a < len_b) ? len_a : len_b;
1110 if (min_len > 0) {
1111 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1112 if (c==0)
1113 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001114 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001115 c = 0;
1116 if (c == 0)
1117 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1118 switch (op) {
1119 case Py_LT: c = c < 0; break;
1120 case Py_LE: c = c <= 0; break;
1121 case Py_EQ: assert(0); break; /* unreachable */
1122 case Py_NE: c = c != 0; break;
1123 case Py_GT: c = c > 0; break;
1124 case Py_GE: c = c >= 0; break;
1125 default:
1126 result = Py_NotImplemented;
1127 goto out;
1128 }
1129 result = c ? Py_True : Py_False;
1130 out:
1131 Py_INCREF(result);
1132 return result;
1133}
1134
1135int
1136_PyString_Eq(PyObject *o1, PyObject *o2)
1137{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001138 PyStringObject *a = (PyStringObject*) o1;
1139 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001140 return a->ob_size == b->ob_size
1141 && *a->ob_sval == *b->ob_sval
1142 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001143}
1144
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145static long
Fred Drakeba096332000-07-09 07:04:36 +00001146string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001148 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001149 register unsigned char *p;
1150 register long x;
1151
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 if (a->ob_shash != -1)
1153 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001154 len = a->ob_size;
1155 p = (unsigned char *) a->ob_sval;
1156 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001158 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001159 x ^= a->ob_size;
1160 if (x == -1)
1161 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001162 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163 return x;
1164}
1165
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166static PyObject*
1167string_subscript(PyStringObject* self, PyObject* item)
1168{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001169 if (PyIndex_Check(item)) {
1170 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001171 if (i == -1 && PyErr_Occurred())
1172 return NULL;
1173 if (i < 0)
1174 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001175 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001176 }
1177 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001178 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001179 char* source_buf;
1180 char* result_buf;
1181 PyObject* result;
1182
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001183 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001184 PyString_GET_SIZE(self),
1185 &start, &stop, &step, &slicelength) < 0) {
1186 return NULL;
1187 }
1188
1189 if (slicelength <= 0) {
1190 return PyString_FromStringAndSize("", 0);
1191 }
1192 else {
1193 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001194 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001195 if (result_buf == NULL)
1196 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001197
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001198 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199 cur += step, i++) {
1200 result_buf[i] = source_buf[cur];
1201 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001202
1203 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 slicelength);
1205 PyMem_Free(result_buf);
1206 return result;
1207 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001208 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001210 PyErr_Format(PyExc_TypeError,
1211 "string indices must be integers, not %.200s",
1212 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001213 return NULL;
1214 }
1215}
1216
Martin v. Löwis18e16552006-02-15 17:27:45 +00001217static Py_ssize_t
1218string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001219{
1220 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001221 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001222 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223 return -1;
1224 }
1225 *ptr = (void *)self->ob_sval;
1226 return self->ob_size;
1227}
1228
Martin v. Löwis18e16552006-02-15 17:27:45 +00001229static Py_ssize_t
1230string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231{
Guido van Rossum045e6881997-09-08 18:30:11 +00001232 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001233 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001234 return -1;
1235}
1236
Martin v. Löwis18e16552006-02-15 17:27:45 +00001237static Py_ssize_t
1238string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001239{
1240 if ( lenp )
1241 *lenp = self->ob_size;
1242 return 1;
1243}
1244
Martin v. Löwis18e16552006-02-15 17:27:45 +00001245static Py_ssize_t
1246string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001247{
1248 if ( index != 0 ) {
1249 PyErr_SetString(PyExc_SystemError,
1250 "accessing non-existent string segment");
1251 return -1;
1252 }
1253 *ptr = self->ob_sval;
1254 return self->ob_size;
1255}
1256
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001257static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001258 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001259 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001260 (ssizeargfunc)string_repeat, /*sq_repeat*/
1261 (ssizeargfunc)string_item, /*sq_item*/
1262 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001263 0, /*sq_ass_item*/
1264 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001265 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001266};
1267
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001270 (binaryfunc)string_subscript,
1271 0,
1272};
1273
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001275 (readbufferproc)string_buffer_getreadbuf,
1276 (writebufferproc)string_buffer_getwritebuf,
1277 (segcountproc)string_buffer_getsegcount,
1278 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001279};
1280
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281
1282
1283#define LEFTSTRIP 0
1284#define RIGHTSTRIP 1
1285#define BOTHSTRIP 2
1286
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001287/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001288static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1289
1290#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001291
Thomas Wouters477c8d52006-05-27 19:21:47 +00001292
1293/* Don't call if length < 2 */
1294#define Py_STRING_MATCH(target, offset, pattern, length) \
1295 (target[offset] == pattern[0] && \
1296 target[offset+length-1] == pattern[length-1] && \
1297 !memcmp(target+offset+1, pattern+1, length-2) )
1298
1299
1300/* Overallocate the initial list to reduce the number of reallocs for small
1301 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1302 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1303 text (roughly 11 words per line) and field delimited data (usually 1-10
1304 fields). For large strings the split algorithms are bandwidth limited
1305 so increasing the preallocation likely will not improve things.*/
1306
1307#define MAX_PREALLOC 12
1308
1309/* 5 splits gives 6 elements */
1310#define PREALLOC_SIZE(maxsplit) \
1311 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1312
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001313#define SPLIT_APPEND(data, left, right) \
1314 str = PyString_FromStringAndSize((data) + (left), \
1315 (right) - (left)); \
1316 if (str == NULL) \
1317 goto onError; \
1318 if (PyList_Append(list, str)) { \
1319 Py_DECREF(str); \
1320 goto onError; \
1321 } \
1322 else \
1323 Py_DECREF(str);
1324
Thomas Wouters477c8d52006-05-27 19:21:47 +00001325#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001326 str = PyString_FromStringAndSize((data) + (left), \
1327 (right) - (left)); \
1328 if (str == NULL) \
1329 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001330 if (count < MAX_PREALLOC) { \
1331 PyList_SET_ITEM(list, count, str); \
1332 } else { \
1333 if (PyList_Append(list, str)) { \
1334 Py_DECREF(str); \
1335 goto onError; \
1336 } \
1337 else \
1338 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001340 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341
Thomas Wouters477c8d52006-05-27 19:21:47 +00001342/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001343#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001344
1345#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1346#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1347#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1348#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1349
1350Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001351split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001353 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001354 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001355 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356
1357 if (list == NULL)
1358 return NULL;
1359
Thomas Wouters477c8d52006-05-27 19:21:47 +00001360 i = j = 0;
1361
1362 while (maxsplit-- > 0) {
1363 SKIP_SPACE(s, i, len);
1364 if (i==len) break;
1365 j = i; i++;
1366 SKIP_NONSPACE(s, i, len);
1367 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001369
1370 if (i < len) {
1371 /* Only occurs when maxsplit was reached */
1372 /* Skip any remaining whitespace and copy to end of string */
1373 SKIP_SPACE(s, i, len);
1374 if (i != len)
1375 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001376 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001379 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380 Py_DECREF(list);
1381 return NULL;
1382}
1383
Thomas Wouters477c8d52006-05-27 19:21:47 +00001384Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001385split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001386{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001387 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001388 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001389 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001390
1391 if (list == NULL)
1392 return NULL;
1393
Thomas Wouters477c8d52006-05-27 19:21:47 +00001394 i = j = 0;
1395 while ((j < len) && (maxcount-- > 0)) {
1396 for(; j<len; j++) {
1397 /* I found that using memchr makes no difference */
1398 if (s[j] == ch) {
1399 SPLIT_ADD(s, i, j);
1400 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001401 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001402 }
1403 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001404 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405 if (i <= len) {
1406 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001407 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 return list;
1410
1411 onError:
1412 Py_DECREF(list);
1413 return NULL;
1414}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001416PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417"S.split([sep [,maxsplit]]) -> list of strings\n\
1418\n\
1419Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001420delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001421splits are done. If sep is not specified or is None, any\n\
1422whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
1424static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001425string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001427 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001428 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001429 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001430 PyObject *list, *str, *subobj = Py_None;
1431#ifdef USE_FAST
1432 Py_ssize_t pos;
1433#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001435 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001437 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001438 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001439 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001441 if (PyString_Check(subobj)) {
1442 sub = PyString_AS_STRING(subobj);
1443 n = PyString_GET_SIZE(subobj);
1444 }
1445 else if (PyUnicode_Check(subobj))
1446 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1447 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1448 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001449
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450 if (n == 0) {
1451 PyErr_SetString(PyExc_ValueError, "empty separator");
1452 return NULL;
1453 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001454 else if (n == 1)
1455 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456
Thomas Wouters477c8d52006-05-27 19:21:47 +00001457 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 if (list == NULL)
1459 return NULL;
1460
Thomas Wouters477c8d52006-05-27 19:21:47 +00001461#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001463 while (maxsplit-- > 0) {
1464 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1465 if (pos < 0)
1466 break;
1467 j = i+pos;
1468 SPLIT_ADD(s, i, j);
1469 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001471#else
1472 i = j = 0;
1473 while ((j+n <= len) && (maxsplit-- > 0)) {
1474 for (; j+n <= len; j++) {
1475 if (Py_STRING_MATCH(s, j, sub, n)) {
1476 SPLIT_ADD(s, i, j);
1477 i = j = j + n;
1478 break;
1479 }
1480 }
1481 }
1482#endif
1483 SPLIT_ADD(s, i, len);
1484 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 return list;
1486
Thomas Wouters477c8d52006-05-27 19:21:47 +00001487 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 Py_DECREF(list);
1489 return NULL;
1490}
1491
Thomas Wouters477c8d52006-05-27 19:21:47 +00001492PyDoc_STRVAR(partition__doc__,
1493"S.partition(sep) -> (head, sep, tail)\n\
1494\n\
1495Searches for the separator sep in S, and returns the part before it,\n\
1496the separator itself, and the part after it. If the separator is not\n\
1497found, returns S and two empty strings.");
1498
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001499static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001500string_partition(PyStringObject *self, PyObject *sep_obj)
1501{
1502 const char *sep;
1503 Py_ssize_t sep_len;
1504
1505 if (PyString_Check(sep_obj)) {
1506 sep = PyString_AS_STRING(sep_obj);
1507 sep_len = PyString_GET_SIZE(sep_obj);
1508 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 else if (PyUnicode_Check(sep_obj))
1510 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1512 return NULL;
1513
1514 return stringlib_partition(
1515 (PyObject*) self,
1516 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1517 sep_obj, sep, sep_len
1518 );
1519}
1520
1521PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001522"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001523\n\
1524Searches for the separator sep in S, starting at the end of S, and returns\n\
1525the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001526separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001527
1528static PyObject *
1529string_rpartition(PyStringObject *self, PyObject *sep_obj)
1530{
1531 const char *sep;
1532 Py_ssize_t sep_len;
1533
1534 if (PyString_Check(sep_obj)) {
1535 sep = PyString_AS_STRING(sep_obj);
1536 sep_len = PyString_GET_SIZE(sep_obj);
1537 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001538 else if (PyUnicode_Check(sep_obj))
1539 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1541 return NULL;
1542
1543 return stringlib_rpartition(
1544 (PyObject*) self,
1545 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1546 sep_obj, sep, sep_len
1547 );
1548}
1549
1550Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001551rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001552{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001554 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001555 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001556
1557 if (list == NULL)
1558 return NULL;
1559
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001561
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562 while (maxsplit-- > 0) {
1563 RSKIP_SPACE(s, i);
1564 if (i<0) break;
1565 j = i; i--;
1566 RSKIP_NONSPACE(s, i);
1567 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001568 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001569 if (i >= 0) {
1570 /* Only occurs when maxsplit was reached */
1571 /* Skip any remaining whitespace and copy to beginning of string */
1572 RSKIP_SPACE(s, i);
1573 if (i >= 0)
1574 SPLIT_ADD(s, 0, i + 1);
1575
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001576 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 FIX_PREALLOC_SIZE(list);
1578 if (PyList_Reverse(list) < 0)
1579 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001580 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Thomas Wouters477c8d52006-05-27 19:21:47 +00001586Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001587rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001588{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001590 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001592
1593 if (list == NULL)
1594 return NULL;
1595
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 i = j = len - 1;
1597 while ((i >= 0) && (maxcount-- > 0)) {
1598 for (; i >= 0; i--) {
1599 if (s[i] == ch) {
1600 SPLIT_ADD(s, i + 1, j + 1);
1601 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001602 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001603 }
1604 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001605 }
1606 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001607 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001608 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001609 FIX_PREALLOC_SIZE(list);
1610 if (PyList_Reverse(list) < 0)
1611 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 return list;
1613
1614 onError:
1615 Py_DECREF(list);
1616 return NULL;
1617}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001618
1619PyDoc_STRVAR(rsplit__doc__,
1620"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1621\n\
1622Return a list of the words in the string S, using sep as the\n\
1623delimiter string, starting at the end of the string and working\n\
1624to the front. If maxsplit is given, at most maxsplit splits are\n\
1625done. If sep is not specified or is None, any whitespace string\n\
1626is a separator.");
1627
1628static PyObject *
1629string_rsplit(PyStringObject *self, PyObject *args)
1630{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001632 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001633 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001634 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001635
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001636 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001637 return NULL;
1638 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001639 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001640 if (subobj == Py_None)
1641 return rsplit_whitespace(s, len, maxsplit);
1642 if (PyString_Check(subobj)) {
1643 sub = PyString_AS_STRING(subobj);
1644 n = PyString_GET_SIZE(subobj);
1645 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001646 else if (PyUnicode_Check(subobj))
1647 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001648 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1649 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001651 if (n == 0) {
1652 PyErr_SetString(PyExc_ValueError, "empty separator");
1653 return NULL;
1654 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001655 else if (n == 1)
1656 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657
Thomas Wouters477c8d52006-05-27 19:21:47 +00001658 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659 if (list == NULL)
1660 return NULL;
1661
1662 j = len;
1663 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664
Thomas Wouters477c8d52006-05-27 19:21:47 +00001665 while ( (i >= 0) && (maxsplit-- > 0) ) {
1666 for (; i>=0; i--) {
1667 if (Py_STRING_MATCH(s, i, sub, n)) {
1668 SPLIT_ADD(s, i + n, j);
1669 j = i;
1670 i -= n;
1671 break;
1672 }
1673 }
1674 }
1675 SPLIT_ADD(s, 0, j);
1676 FIX_PREALLOC_SIZE(list);
1677 if (PyList_Reverse(list) < 0)
1678 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 return list;
1680
Thomas Wouters477c8d52006-05-27 19:21:47 +00001681onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001682 Py_DECREF(list);
1683 return NULL;
1684}
1685
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001687PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688"S.join(sequence) -> string\n\
1689\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001691sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692
1693static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001694string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695{
1696 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001697 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001700 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001701 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001702 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704
Tim Peters19fe14e2001-01-19 03:03:47 +00001705 seq = PySequence_Fast(orig, "");
1706 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001707 return NULL;
1708 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001709
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001710 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001711 if (seqlen == 0) {
1712 Py_DECREF(seq);
1713 return PyString_FromString("");
1714 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001716 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001717 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1718 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001719 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001720 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001721 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001723
Raymond Hettinger674f2412004-08-23 23:23:54 +00001724 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001725 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001726 * Do a pre-pass to figure out the total amount of space we'll
1727 * need (sz), see whether any argument is absurd, and defer to
1728 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001729 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001730 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001731 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001732 item = PySequence_Fast_GET_ITEM(seq, i);
1733 if (!PyString_Check(item)){
1734 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001735 /* Defer to Unicode join.
1736 * CAUTION: There's no gurantee that the
1737 * original sequence can be iterated over
1738 * again, so we must pass seq here.
1739 */
1740 PyObject *result;
1741 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001742 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001743 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001744 }
1745 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001746 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001747 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001748 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001749 Py_DECREF(seq);
1750 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 sz += PyString_GET_SIZE(item);
1753 if (i != 0)
1754 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001755 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001756 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001757 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001758 Py_DECREF(seq);
1759 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 }
1762
1763 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001764 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001765 if (res == NULL) {
1766 Py_DECREF(seq);
1767 return NULL;
1768 }
1769
1770 /* Catenate everything. */
1771 p = PyString_AS_STRING(res);
1772 for (i = 0; i < seqlen; ++i) {
1773 size_t n;
1774 item = PySequence_Fast_GET_ITEM(seq, i);
1775 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001776 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001777 p += n;
1778 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001779 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001780 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001781 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001783
Jeremy Hylton49048292000-07-11 03:28:17 +00001784 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786}
1787
Tim Peters52e155e2001-06-16 05:42:57 +00001788PyObject *
1789_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001790{
Tim Petersa7259592001-06-16 05:11:17 +00001791 assert(sep != NULL && PyString_Check(sep));
1792 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001793 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001794}
1795
Thomas Wouters477c8d52006-05-27 19:21:47 +00001796Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001797string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001798{
1799 if (*end > len)
1800 *end = len;
1801 else if (*end < 0)
1802 *end += len;
1803 if (*end < 0)
1804 *end = 0;
1805 if (*start < 0)
1806 *start += len;
1807 if (*start < 0)
1808 *start = 0;
1809}
1810
Thomas Wouters477c8d52006-05-27 19:21:47 +00001811Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001812string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001814 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001815 const char *sub;
1816 Py_ssize_t sub_len;
1817 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818
Thomas Wouters477c8d52006-05-27 19:21:47 +00001819 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1820 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001821 return -2;
1822 if (PyString_Check(subobj)) {
1823 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001824 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001825 }
1826 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827 return PyUnicode_Find(
1828 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001829 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001830 /* XXX - the "expected a character buffer object" is pretty
1831 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 return -2;
1833
Thomas Wouters477c8d52006-05-27 19:21:47 +00001834 if (dir > 0)
1835 return stringlib_find_slice(
1836 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1837 sub, sub_len, start, end);
1838 else
1839 return stringlib_rfind_slice(
1840 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1841 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842}
1843
1844
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001845PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846"S.find(sub [,start [,end]]) -> int\n\
1847\n\
1848Return the lowest index in S where substring sub is found,\n\
1849such that sub is contained within s[start,end]. Optional\n\
1850arguments start and end are interpreted as in slice notation.\n\
1851\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
1854static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001855string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001857 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 if (result == -2)
1859 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861}
1862
1863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001864PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865"S.index(sub [,start [,end]]) -> int\n\
1866\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001867Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868
1869static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001870string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 if (result == -2)
1874 return NULL;
1875 if (result == -1) {
1876 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001877 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 return NULL;
1879 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001880 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881}
1882
1883
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001884PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885"S.rfind(sub [,start [,end]]) -> int\n\
1886\n\
1887Return the highest index in S where substring sub is found,\n\
1888such that sub is contained within s[start,end]. Optional\n\
1889arguments start and end are interpreted as in slice notation.\n\
1890\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001891Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892
1893static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001894string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001896 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 if (result == -2)
1898 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001899 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900}
1901
1902
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001903PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904"S.rindex(sub [,start [,end]]) -> int\n\
1905\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001906Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907
1908static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001909string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001911 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912 if (result == -2)
1913 return NULL;
1914 if (result == -1) {
1915 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001916 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917 return NULL;
1918 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920}
1921
1922
Thomas Wouters477c8d52006-05-27 19:21:47 +00001923Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001924do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1925{
1926 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001928 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001929 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1930 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001931
1932 i = 0;
1933 if (striptype != RIGHTSTRIP) {
1934 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1935 i++;
1936 }
1937 }
1938
1939 j = len;
1940 if (striptype != LEFTSTRIP) {
1941 do {
1942 j--;
1943 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1944 j++;
1945 }
1946
1947 if (i == 0 && j == len && PyString_CheckExact(self)) {
1948 Py_INCREF(self);
1949 return (PyObject*)self;
1950 }
1951 else
1952 return PyString_FromStringAndSize(s+i, j-i);
1953}
1954
1955
Thomas Wouters477c8d52006-05-27 19:21:47 +00001956Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001957do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
1959 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 i = 0;
1963 if (striptype != RIGHTSTRIP) {
1964 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1965 i++;
1966 }
1967 }
1968
1969 j = len;
1970 if (striptype != LEFTSTRIP) {
1971 do {
1972 j--;
1973 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1974 j++;
1975 }
1976
Tim Peters8fa5dd02001-09-12 02:18:30 +00001977 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 Py_INCREF(self);
1979 return (PyObject*)self;
1980 }
1981 else
1982 return PyString_FromStringAndSize(s+i, j-i);
1983}
1984
1985
Thomas Wouters477c8d52006-05-27 19:21:47 +00001986Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001987do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1988{
1989 PyObject *sep = NULL;
1990
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001991 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001992 return NULL;
1993
1994 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001995 if (PyString_Check(sep))
1996 return do_xstrip(self, striptype, sep);
1997 else if (PyUnicode_Check(sep)) {
1998 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1999 PyObject *res;
2000 if (uniself==NULL)
2001 return NULL;
2002 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2003 striptype, sep);
2004 Py_DECREF(uniself);
2005 return res;
2006 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002007 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002008 "%s arg must be None or str",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002009 STRIPNAME(striptype));
2010 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002011 }
2012
2013 return do_strip(self, striptype);
2014}
2015
2016
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002017PyDoc_STRVAR(strip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002018"S.strip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019\n\
2020Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002021whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002022If chars is given and not None, remove characters in chars instead.\n\
2023If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024
2025static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002026string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002028 if (PyTuple_GET_SIZE(args) == 0)
2029 return do_strip(self, BOTHSTRIP); /* Common case */
2030 else
2031 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032}
2033
2034
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002035PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002036"S.lstrip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002038Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002039If chars is given and not None, remove characters in chars instead.\n\
2040If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041
2042static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002045 if (PyTuple_GET_SIZE(args) == 0)
2046 return do_strip(self, LEFTSTRIP); /* Common case */
2047 else
2048 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049}
2050
2051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002052PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002053"S.rstrip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002055Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002056If chars is given and not None, remove characters in chars instead.\n\
2057If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058
2059static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 if (PyTuple_GET_SIZE(args) == 0)
2063 return do_strip(self, RIGHTSTRIP); /* Common case */
2064 else
2065 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066}
2067
2068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002069PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070"S.lower() -> string\n\
2071\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002072Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002073
Thomas Wouters477c8d52006-05-27 19:21:47 +00002074/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2075#ifndef _tolower
2076#define _tolower tolower
2077#endif
2078
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002080string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002082 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002083 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002084 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002086 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002087 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002089
2090 s = PyString_AS_STRING(newobj);
2091
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002092 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002093
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002095 int c = Py_CHARMASK(s[i]);
2096 if (isupper(c))
2097 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002098 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002099
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002100 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101}
2102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104"S.upper() -> string\n\
2105\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002106Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107
Thomas Wouters477c8d52006-05-27 19:21:47 +00002108#ifndef _toupper
2109#define _toupper toupper
2110#endif
2111
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002113string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002116 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002117 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002119 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002120 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002122
2123 s = PyString_AS_STRING(newobj);
2124
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002125 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002128 int c = Py_CHARMASK(s[i]);
2129 if (islower(c))
2130 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002132
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002133 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134}
2135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002136PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002137"S.title() -> string\n\
2138\n\
2139Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002140characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002141
2142static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002143string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144{
2145 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002146 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002147 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002148 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002149
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002150 newobj = PyString_FromStringAndSize(NULL, n);
2151 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002153 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154 for (i = 0; i < n; i++) {
2155 int c = Py_CHARMASK(*s++);
2156 if (islower(c)) {
2157 if (!previous_is_cased)
2158 c = toupper(c);
2159 previous_is_cased = 1;
2160 } else if (isupper(c)) {
2161 if (previous_is_cased)
2162 c = tolower(c);
2163 previous_is_cased = 1;
2164 } else
2165 previous_is_cased = 0;
2166 *s_new++ = c;
2167 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002168 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169}
2170
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002171PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172"S.capitalize() -> string\n\
2173\n\
2174Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002175capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
2177static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002178string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179{
2180 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002181 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002182 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002184 newobj = PyString_FromStringAndSize(NULL, n);
2185 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002187 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 if (0 < n) {
2189 int c = Py_CHARMASK(*s++);
2190 if (islower(c))
2191 *s_new = toupper(c);
2192 else
2193 *s_new = c;
2194 s_new++;
2195 }
2196 for (i = 1; i < n; i++) {
2197 int c = Py_CHARMASK(*s++);
2198 if (isupper(c))
2199 *s_new = tolower(c);
2200 else
2201 *s_new = c;
2202 s_new++;
2203 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002204 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205}
2206
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209"S.count(sub[, start[, end]]) -> int\n\
2210\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002211Return the number of non-overlapping occurrences of substring sub in\n\
2212string S[start:end]. Optional arguments start and end are interpreted\n\
2213as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214
2215static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002216string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002218 PyObject *sub_obj;
2219 const char *str = PyString_AS_STRING(self), *sub;
2220 Py_ssize_t sub_len;
2221 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Thomas Wouters477c8d52006-05-27 19:21:47 +00002223 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2224 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002226
Thomas Wouters477c8d52006-05-27 19:21:47 +00002227 if (PyString_Check(sub_obj)) {
2228 sub = PyString_AS_STRING(sub_obj);
2229 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002231 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002232 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002234 if (count == -1)
2235 return NULL;
2236 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002237 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002238 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002239 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240 return NULL;
2241
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002243
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244 return PyInt_FromSsize_t(
2245 stringlib_count(str + start, end - start, sub, sub_len)
2246 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247}
2248
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002249PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250"S.swapcase() -> string\n\
2251\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002253converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254
2255static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002256string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257{
2258 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002259 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002260 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002262 newobj = PyString_FromStringAndSize(NULL, n);
2263 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002265 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266 for (i = 0; i < n; i++) {
2267 int c = Py_CHARMASK(*s++);
2268 if (islower(c)) {
2269 *s_new = toupper(c);
2270 }
2271 else if (isupper(c)) {
2272 *s_new = tolower(c);
2273 }
2274 else
2275 *s_new = c;
2276 s_new++;
2277 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002278 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279}
2280
2281
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002282PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283"S.translate(table [,deletechars]) -> string\n\
2284\n\
2285Return a copy of the string S, where all characters occurring\n\
2286in the optional argument deletechars are removed, and the\n\
2287remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002288translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289
2290static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002291string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002294 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002295 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002297 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002298 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299 PyObject *result;
2300 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002303 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306
2307 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002308 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 tablen = PyString_GET_SIZE(tableobj);
2310 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002311 else if (tableobj == Py_None) {
2312 table = NULL;
2313 tablen = 256;
2314 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002316 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 parameter; instead a mapping to None will cause characters
2318 to be deleted. */
2319 if (delobj != NULL) {
2320 PyErr_SetString(PyExc_TypeError,
2321 "deletions are implemented differently for unicode");
2322 return NULL;
2323 }
2324 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2325 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002326 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328
Martin v. Löwis00b61272002-12-12 20:03:19 +00002329 if (tablen != 256) {
2330 PyErr_SetString(PyExc_ValueError,
2331 "translation table must be 256 characters long");
2332 return NULL;
2333 }
2334
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335 if (delobj != NULL) {
2336 if (PyString_Check(delobj)) {
2337 del_table = PyString_AS_STRING(delobj);
2338 dellen = PyString_GET_SIZE(delobj);
2339 }
2340 else if (PyUnicode_Check(delobj)) {
2341 PyErr_SetString(PyExc_TypeError,
2342 "deletions are implemented differently for unicode");
2343 return NULL;
2344 }
2345 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2346 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347 }
2348 else {
2349 del_table = NULL;
2350 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 }
2352
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002353 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 result = PyString_FromStringAndSize((char *)NULL, inlen);
2355 if (result == NULL)
2356 return NULL;
2357 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002358 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359
Guido van Rossumd8faa362007-04-27 19:54:29 +00002360 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 /* If no deletions are required, use faster code */
2362 for (i = inlen; --i >= 0; ) {
2363 c = Py_CHARMASK(*input++);
2364 if (Py_CHARMASK((*output++ = table[c])) != c)
2365 changed = 1;
2366 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002367 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 return result;
2369 Py_DECREF(result);
2370 Py_INCREF(input_obj);
2371 return input_obj;
2372 }
2373
Guido van Rossumd8faa362007-04-27 19:54:29 +00002374 if (table == NULL) {
2375 for (i = 0; i < 256; i++)
2376 trans_table[i] = Py_CHARMASK(i);
2377 } else {
2378 for (i = 0; i < 256; i++)
2379 trans_table[i] = Py_CHARMASK(table[i]);
2380 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381
2382 for (i = 0; i < dellen; i++)
2383 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2384
2385 for (i = inlen; --i >= 0; ) {
2386 c = Py_CHARMASK(*input++);
2387 if (trans_table[c] != -1)
2388 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2389 continue;
2390 changed = 1;
2391 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002392 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002393 Py_DECREF(result);
2394 Py_INCREF(input_obj);
2395 return input_obj;
2396 }
2397 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002398 if (inlen > 0)
2399 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400 return result;
2401}
2402
2403
Thomas Wouters477c8d52006-05-27 19:21:47 +00002404#define FORWARD 1
2405#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409#define findchar(target, target_len, c) \
2410 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411
Thomas Wouters477c8d52006-05-27 19:21:47 +00002412/* String ops must return a string. */
2413/* If the object is subclass of string, create a copy */
2414Py_LOCAL(PyStringObject *)
2415return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002417 if (PyString_CheckExact(self)) {
2418 Py_INCREF(self);
2419 return self;
2420 }
2421 return (PyStringObject *)PyString_FromStringAndSize(
2422 PyString_AS_STRING(self),
2423 PyString_GET_SIZE(self));
2424}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002427countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002428{
2429 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002430 const char *start=target;
2431 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432
Thomas Wouters477c8d52006-05-27 19:21:47 +00002433 while ( (start=findchar(start, end-start, c)) != NULL ) {
2434 count++;
2435 if (count >= maxcount)
2436 break;
2437 start += 1;
2438 }
2439 return count;
2440}
2441
2442Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002443findstring(const char *target, Py_ssize_t target_len,
2444 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002445 Py_ssize_t start,
2446 Py_ssize_t end,
2447 int direction)
2448{
2449 if (start < 0) {
2450 start += target_len;
2451 if (start < 0)
2452 start = 0;
2453 }
2454 if (end > target_len) {
2455 end = target_len;
2456 } else if (end < 0) {
2457 end += target_len;
2458 if (end < 0)
2459 end = 0;
2460 }
2461
2462 /* zero-length substrings always match at the first attempt */
2463 if (pattern_len == 0)
2464 return (direction > 0) ? start : end;
2465
2466 end -= pattern_len;
2467
2468 if (direction < 0) {
2469 for (; end >= start; end--)
2470 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2471 return end;
2472 } else {
2473 for (; start <= end; start++)
2474 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2475 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476 }
2477 return -1;
2478}
2479
Thomas Wouters477c8d52006-05-27 19:21:47 +00002480Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002481countstring(const char *target, Py_ssize_t target_len,
2482 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002483 Py_ssize_t start,
2484 Py_ssize_t end,
2485 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002487 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488
Thomas Wouters477c8d52006-05-27 19:21:47 +00002489 if (start < 0) {
2490 start += target_len;
2491 if (start < 0)
2492 start = 0;
2493 }
2494 if (end > target_len) {
2495 end = target_len;
2496 } else if (end < 0) {
2497 end += target_len;
2498 if (end < 0)
2499 end = 0;
2500 }
2501
2502 /* zero-length substrings match everywhere */
2503 if (pattern_len == 0 || maxcount == 0) {
2504 if (target_len+1 < maxcount)
2505 return target_len+1;
2506 return maxcount;
2507 }
2508
2509 end -= pattern_len;
2510 if (direction < 0) {
2511 for (; (end >= start); end--)
2512 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2513 count++;
2514 if (--maxcount <= 0) break;
2515 end -= pattern_len-1;
2516 }
2517 } else {
2518 for (; (start <= end); start++)
2519 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2520 count++;
2521 if (--maxcount <= 0)
2522 break;
2523 start += pattern_len-1;
2524 }
2525 }
2526 return count;
2527}
2528
2529
2530/* Algorithms for different cases of string replacement */
2531
2532/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2533Py_LOCAL(PyStringObject *)
2534replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002535 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002536 Py_ssize_t maxcount)
2537{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002538 char *self_s, *result_s;
2539 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002540 Py_ssize_t count, i, product;
2541 PyStringObject *result;
2542
2543 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002544
Thomas Wouters477c8d52006-05-27 19:21:47 +00002545 /* 1 at the end plus 1 after every character */
2546 count = self_len+1;
2547 if (maxcount < count)
2548 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002549
Thomas Wouters477c8d52006-05-27 19:21:47 +00002550 /* Check for overflow */
2551 /* result_len = count * to_len + self_len; */
2552 product = count * to_len;
2553 if (product / to_len != count) {
2554 PyErr_SetString(PyExc_OverflowError,
2555 "replace string is too long");
2556 return NULL;
2557 }
2558 result_len = product + self_len;
2559 if (result_len < 0) {
2560 PyErr_SetString(PyExc_OverflowError,
2561 "replace string is too long");
2562 return NULL;
2563 }
2564
2565 if (! (result = (PyStringObject *)
2566 PyString_FromStringAndSize(NULL, result_len)) )
2567 return NULL;
2568
2569 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002570 result_s = PyString_AS_STRING(result);
2571
2572 /* TODO: special case single character, which doesn't need memcpy */
2573
2574 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002575 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002576 result_s += to_len;
2577 count -= 1;
2578
2579 for (i=0; i<count; i++) {
2580 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002581 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002582 result_s += to_len;
2583 }
2584
2585 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002586 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002587
2588 return result;
2589}
2590
2591/* Special case for deleting a single character */
2592/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2593Py_LOCAL(PyStringObject *)
2594replace_delete_single_character(PyStringObject *self,
2595 char from_c, Py_ssize_t maxcount)
2596{
2597 char *self_s, *result_s;
2598 char *start, *next, *end;
2599 Py_ssize_t self_len, result_len;
2600 Py_ssize_t count;
2601 PyStringObject *result;
2602
2603 self_len = PyString_GET_SIZE(self);
2604 self_s = PyString_AS_STRING(self);
2605
2606 count = countchar(self_s, self_len, from_c, maxcount);
2607 if (count == 0) {
2608 return return_self(self);
2609 }
2610
2611 result_len = self_len - count; /* from_len == 1 */
2612 assert(result_len>=0);
2613
2614 if ( (result = (PyStringObject *)
2615 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2616 return NULL;
2617 result_s = PyString_AS_STRING(result);
2618
2619 start = self_s;
2620 end = self_s + self_len;
2621 while (count-- > 0) {
2622 next = findchar(start, end-start, from_c);
2623 if (next == NULL)
2624 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002625 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002626 result_s += (next-start);
2627 start = next+1;
2628 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002629 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002630
Thomas Wouters477c8d52006-05-27 19:21:47 +00002631 return result;
2632}
2633
2634/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2635
2636Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002637replace_delete_substring(PyStringObject *self,
2638 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002639 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002640 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002642 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002643 Py_ssize_t count, offset;
2644 PyStringObject *result;
2645
2646 self_len = PyString_GET_SIZE(self);
2647 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002648
2649 count = countstring(self_s, self_len,
2650 from_s, from_len,
2651 0, self_len, 1,
2652 maxcount);
2653
2654 if (count == 0) {
2655 /* no matches */
2656 return return_self(self);
2657 }
2658
2659 result_len = self_len - (count * from_len);
2660 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002661
Thomas Wouters477c8d52006-05-27 19:21:47 +00002662 if ( (result = (PyStringObject *)
2663 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2664 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665
Thomas Wouters477c8d52006-05-27 19:21:47 +00002666 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002667
Thomas Wouters477c8d52006-05-27 19:21:47 +00002668 start = self_s;
2669 end = self_s + self_len;
2670 while (count-- > 0) {
2671 offset = findstring(start, end-start,
2672 from_s, from_len,
2673 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002674 if (offset == -1)
2675 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002676 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002678 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002679
Thomas Wouters477c8d52006-05-27 19:21:47 +00002680 result_s += (next-start);
2681 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002682 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002683 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002684 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002685}
2686
Thomas Wouters477c8d52006-05-27 19:21:47 +00002687/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2688Py_LOCAL(PyStringObject *)
2689replace_single_character_in_place(PyStringObject *self,
2690 char from_c, char to_c,
2691 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002692{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002693 char *self_s, *result_s, *start, *end, *next;
2694 Py_ssize_t self_len;
2695 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002696
Thomas Wouters477c8d52006-05-27 19:21:47 +00002697 /* The result string will be the same size */
2698 self_s = PyString_AS_STRING(self);
2699 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002702
Thomas Wouters477c8d52006-05-27 19:21:47 +00002703 if (next == NULL) {
2704 /* No matches; return the original string */
2705 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002706 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002707
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708 /* Need to make a new string */
2709 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2710 if (result == NULL)
2711 return NULL;
2712 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002713 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002714
Thomas Wouters477c8d52006-05-27 19:21:47 +00002715 /* change everything in-place, starting with this one */
2716 start = result_s + (next-self_s);
2717 *start = to_c;
2718 start++;
2719 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002720
Thomas Wouters477c8d52006-05-27 19:21:47 +00002721 while (--maxcount > 0) {
2722 next = findchar(start, end-start, from_c);
2723 if (next == NULL)
2724 break;
2725 *next = to_c;
2726 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002727 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002728
Thomas Wouters477c8d52006-05-27 19:21:47 +00002729 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002730}
2731
Thomas Wouters477c8d52006-05-27 19:21:47 +00002732/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2733Py_LOCAL(PyStringObject *)
2734replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002735 const char *from_s, Py_ssize_t from_len,
2736 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002737 Py_ssize_t maxcount)
2738{
2739 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002740 char *self_s;
2741 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002742 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002743
Thomas Wouters477c8d52006-05-27 19:21:47 +00002744 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002745
Thomas Wouters477c8d52006-05-27 19:21:47 +00002746 self_s = PyString_AS_STRING(self);
2747 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002748
Thomas Wouters477c8d52006-05-27 19:21:47 +00002749 offset = findstring(self_s, self_len,
2750 from_s, from_len,
2751 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002752 if (offset == -1) {
2753 /* No matches; return the original string */
2754 return return_self(self);
2755 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002756
Thomas Wouters477c8d52006-05-27 19:21:47 +00002757 /* Need to make a new string */
2758 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2759 if (result == NULL)
2760 return NULL;
2761 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002762 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002763
Thomas Wouters477c8d52006-05-27 19:21:47 +00002764 /* change everything in-place, starting with this one */
2765 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002766 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767 start += from_len;
2768 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002769
Thomas Wouters477c8d52006-05-27 19:21:47 +00002770 while ( --maxcount > 0) {
2771 offset = findstring(start, end-start,
2772 from_s, from_len,
2773 0, end-start, FORWARD);
2774 if (offset==-1)
2775 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002776 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002777 start += offset+from_len;
2778 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002779
Thomas Wouters477c8d52006-05-27 19:21:47 +00002780 return result;
2781}
2782
2783/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2784Py_LOCAL(PyStringObject *)
2785replace_single_character(PyStringObject *self,
2786 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002787 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 Py_ssize_t maxcount)
2789{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002790 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002791 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002792 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002793 Py_ssize_t count, product;
2794 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002795
Thomas Wouters477c8d52006-05-27 19:21:47 +00002796 self_s = PyString_AS_STRING(self);
2797 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002798
Thomas Wouters477c8d52006-05-27 19:21:47 +00002799 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002800 if (count == 0) {
2801 /* no matches, return unchanged */
2802 return return_self(self);
2803 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002804
Thomas Wouters477c8d52006-05-27 19:21:47 +00002805 /* use the difference between current and new, hence the "-1" */
2806 /* result_len = self_len + count * (to_len-1) */
2807 product = count * (to_len-1);
2808 if (product / (to_len-1) != count) {
2809 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2810 return NULL;
2811 }
2812 result_len = self_len + product;
2813 if (result_len < 0) {
2814 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2815 return NULL;
2816 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002817
Thomas Wouters477c8d52006-05-27 19:21:47 +00002818 if ( (result = (PyStringObject *)
2819 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2820 return NULL;
2821 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002822
Thomas Wouters477c8d52006-05-27 19:21:47 +00002823 start = self_s;
2824 end = self_s + self_len;
2825 while (count-- > 0) {
2826 next = findchar(start, end-start, from_c);
2827 if (next == NULL)
2828 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002829
Thomas Wouters477c8d52006-05-27 19:21:47 +00002830 if (next == start) {
2831 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002832 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002833 result_s += to_len;
2834 start += 1;
2835 } else {
2836 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002837 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002838 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002839 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002840 result_s += to_len;
2841 start = next+1;
2842 }
2843 }
2844 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002845 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002846
Thomas Wouters477c8d52006-05-27 19:21:47 +00002847 return result;
2848}
2849
2850/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2851Py_LOCAL(PyStringObject *)
2852replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002853 const char *from_s, Py_ssize_t from_len,
2854 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002856 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002857 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002858 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002859 Py_ssize_t count, offset, product;
2860 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002861
Thomas Wouters477c8d52006-05-27 19:21:47 +00002862 self_s = PyString_AS_STRING(self);
2863 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002864
Thomas Wouters477c8d52006-05-27 19:21:47 +00002865 count = countstring(self_s, self_len,
2866 from_s, from_len,
2867 0, self_len, FORWARD, maxcount);
2868 if (count == 0) {
2869 /* no matches, return unchanged */
2870 return return_self(self);
2871 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002872
Thomas Wouters477c8d52006-05-27 19:21:47 +00002873 /* Check for overflow */
2874 /* result_len = self_len + count * (to_len-from_len) */
2875 product = count * (to_len-from_len);
2876 if (product / (to_len-from_len) != count) {
2877 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2878 return NULL;
2879 }
2880 result_len = self_len + product;
2881 if (result_len < 0) {
2882 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2883 return NULL;
2884 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002885
Thomas Wouters477c8d52006-05-27 19:21:47 +00002886 if ( (result = (PyStringObject *)
2887 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2888 return NULL;
2889 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002890
Thomas Wouters477c8d52006-05-27 19:21:47 +00002891 start = self_s;
2892 end = self_s + self_len;
2893 while (count-- > 0) {
2894 offset = findstring(start, end-start,
2895 from_s, from_len,
2896 0, end-start, FORWARD);
2897 if (offset == -1)
2898 break;
2899 next = start+offset;
2900 if (next == start) {
2901 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002902 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002903 result_s += to_len;
2904 start += from_len;
2905 } else {
2906 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002907 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002908 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002909 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002910 result_s += to_len;
2911 start = next+from_len;
2912 }
2913 }
2914 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002915 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002916
Thomas Wouters477c8d52006-05-27 19:21:47 +00002917 return result;
2918}
2919
2920
2921Py_LOCAL(PyStringObject *)
2922replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002923 const char *from_s, Py_ssize_t from_len,
2924 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002925 Py_ssize_t maxcount)
2926{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002927 if (maxcount < 0) {
2928 maxcount = PY_SSIZE_T_MAX;
2929 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2930 /* nothing to do; return the original string */
2931 return return_self(self);
2932 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002933
Thomas Wouters477c8d52006-05-27 19:21:47 +00002934 if (maxcount == 0 ||
2935 (from_len == 0 && to_len == 0)) {
2936 /* nothing to do; return the original string */
2937 return return_self(self);
2938 }
2939
2940 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002941
Thomas Wouters477c8d52006-05-27 19:21:47 +00002942 if (from_len == 0) {
2943 /* insert the 'to' string everywhere. */
2944 /* >>> "Python".replace("", ".") */
2945 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002946 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002947 }
2948
2949 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2950 /* point for an empty self string to generate a non-empty string */
2951 /* Special case so the remaining code always gets a non-empty string */
2952 if (PyString_GET_SIZE(self) == 0) {
2953 return return_self(self);
2954 }
2955
2956 if (to_len == 0) {
2957 /* delete all occurances of 'from' string */
2958 if (from_len == 1) {
2959 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002960 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002961 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002962 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002963 }
2964 }
2965
2966 /* Handle special case where both strings have the same length */
2967
2968 if (from_len == to_len) {
2969 if (from_len == 1) {
2970 return replace_single_character_in_place(
2971 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002972 from_s[0],
2973 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002974 maxcount);
2975 } else {
2976 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002977 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002978 }
2979 }
2980
2981 /* Otherwise use the more generic algorithms */
2982 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002983 return replace_single_character(self, from_s[0],
2984 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002985 } else {
2986 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002987 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002988 }
2989}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002990
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002991PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002992"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002993\n\
2994Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002995old replaced by new. If the optional argument count is\n\
2996given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002997
2998static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002999string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003000{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003001 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003002 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003003 const char *from_s, *to_s;
3004 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003005
Thomas Wouters477c8d52006-05-27 19:21:47 +00003006 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003007 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003008
Thomas Wouters477c8d52006-05-27 19:21:47 +00003009 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003010 from_s = PyString_AS_STRING(from);
3011 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003012 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003013 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003014 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003015 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003016 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003017 return NULL;
3018
Thomas Wouters477c8d52006-05-27 19:21:47 +00003019 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003020 to_s = PyString_AS_STRING(to);
3021 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003022 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003023 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003024 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003025 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003026 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003027 return NULL;
3028
Thomas Wouters477c8d52006-05-27 19:21:47 +00003029 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003030 from_s, from_len,
3031 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003032}
3033
Thomas Wouters477c8d52006-05-27 19:21:47 +00003034/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003035
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003036/* Matches the end (direction >= 0) or start (direction < 0) of self
3037 * against substr, using the start and end arguments. Returns
3038 * -1 on error, 0 if not found and 1 if found.
3039 */
3040Py_LOCAL(int)
3041_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3042 Py_ssize_t end, int direction)
3043{
3044 Py_ssize_t len = PyString_GET_SIZE(self);
3045 Py_ssize_t slen;
3046 const char* sub;
3047 const char* str;
3048
3049 if (PyString_Check(substr)) {
3050 sub = PyString_AS_STRING(substr);
3051 slen = PyString_GET_SIZE(substr);
3052 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003053 else if (PyUnicode_Check(substr))
3054 return PyUnicode_Tailmatch((PyObject *)self,
3055 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003056 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3057 return -1;
3058 str = PyString_AS_STRING(self);
3059
3060 string_adjust_indices(&start, &end, len);
3061
3062 if (direction < 0) {
3063 /* startswith */
3064 if (start+slen > len)
3065 return 0;
3066 } else {
3067 /* endswith */
3068 if (end-start < slen || start > len)
3069 return 0;
3070
3071 if (end-slen > start)
3072 start = end - slen;
3073 }
3074 if (end-start >= slen)
3075 return ! memcmp(str+start, sub, slen);
3076 return 0;
3077}
3078
3079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003080PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003081"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003082\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003083Return True if S starts with the specified prefix, False otherwise.\n\
3084With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003085With optional end, stop comparing S at that position.\n\
3086prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087
3088static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003089string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003091 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003092 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003094 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003095
Guido van Rossumc6821402000-05-08 14:08:05 +00003096 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3097 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003098 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003099 if (PyTuple_Check(subobj)) {
3100 Py_ssize_t i;
3101 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3102 result = _string_tailmatch(self,
3103 PyTuple_GET_ITEM(subobj, i),
3104 start, end, -1);
3105 if (result == -1)
3106 return NULL;
3107 else if (result) {
3108 Py_RETURN_TRUE;
3109 }
3110 }
3111 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003113 result = _string_tailmatch(self, subobj, start, end, -1);
3114 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003115 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003116 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003117 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003118}
3119
3120
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003121PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003123\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003124Return True if S ends with the specified suffix, False otherwise.\n\
3125With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003126With optional end, stop comparing S at that position.\n\
3127suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003128
3129static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003130string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003131{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003132 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003133 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003135 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136
Guido van Rossumc6821402000-05-08 14:08:05 +00003137 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3138 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003140 if (PyTuple_Check(subobj)) {
3141 Py_ssize_t i;
3142 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3143 result = _string_tailmatch(self,
3144 PyTuple_GET_ITEM(subobj, i),
3145 start, end, +1);
3146 if (result == -1)
3147 return NULL;
3148 else if (result) {
3149 Py_RETURN_TRUE;
3150 }
3151 }
3152 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003153 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003154 result = _string_tailmatch(self, subobj, start, end, +1);
3155 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003156 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003157 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003158 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003159}
3160
3161
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003162PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003163"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003164\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003165Encodes S using the codec registered for encoding. encoding defaults\n\
3166to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003167handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003168a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3169'xmlcharrefreplace' as well as any other name registered with\n\
3170codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003171
3172static PyObject *
3173string_encode(PyStringObject *self, PyObject *args)
3174{
3175 char *encoding = NULL;
3176 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003177 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003178
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003179 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3180 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003181 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003182 if (v == NULL)
3183 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003184 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3185 PyErr_Format(PyExc_TypeError,
3186 "encoder did not return a string/unicode object "
3187 "(type=%.400s)",
3188 v->ob_type->tp_name);
3189 Py_DECREF(v);
3190 return NULL;
3191 }
3192 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003193
3194 onError:
3195 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003196}
3197
3198
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003199PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003200"S.decode([encoding[,errors]]) -> object\n\
3201\n\
3202Decodes S using the codec registered for encoding. encoding defaults\n\
3203to the default encoding. errors may be given to set a different error\n\
3204handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003205a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3206as well as any other name registerd with codecs.register_error that is\n\
3207able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003208
3209static PyObject *
3210string_decode(PyStringObject *self, PyObject *args)
3211{
3212 char *encoding = NULL;
3213 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003214 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003215
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003216 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3217 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003218 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003219 if (v == NULL)
3220 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003221 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3222 PyErr_Format(PyExc_TypeError,
3223 "decoder did not return a string/unicode object "
3224 "(type=%.400s)",
3225 v->ob_type->tp_name);
3226 Py_DECREF(v);
3227 return NULL;
3228 }
3229 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003230
3231 onError:
3232 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003233}
3234
3235
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003236PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237"S.expandtabs([tabsize]) -> string\n\
3238\n\
3239Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003240If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241
3242static PyObject*
3243string_expandtabs(PyStringObject *self, PyObject *args)
3244{
3245 const char *e, *p;
3246 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003247 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003248 PyObject *u;
3249 int tabsize = 8;
3250
3251 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3252 return NULL;
3253
Thomas Wouters7e474022000-07-16 12:04:32 +00003254 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003255 i = j = 0;
3256 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3257 for (p = PyString_AS_STRING(self); p < e; p++)
3258 if (*p == '\t') {
3259 if (tabsize > 0)
3260 j += tabsize - (j % tabsize);
3261 }
3262 else {
3263 j++;
3264 if (*p == '\n' || *p == '\r') {
3265 i += j;
3266 j = 0;
3267 }
3268 }
3269
3270 /* Second pass: create output string and fill it */
3271 u = PyString_FromStringAndSize(NULL, i + j);
3272 if (!u)
3273 return NULL;
3274
3275 j = 0;
3276 q = PyString_AS_STRING(u);
3277
3278 for (p = PyString_AS_STRING(self); p < e; p++)
3279 if (*p == '\t') {
3280 if (tabsize > 0) {
3281 i = tabsize - (j % tabsize);
3282 j += i;
3283 while (i--)
3284 *q++ = ' ';
3285 }
3286 }
3287 else {
3288 j++;
3289 *q++ = *p;
3290 if (*p == '\n' || *p == '\r')
3291 j = 0;
3292 }
3293
3294 return u;
3295}
3296
Thomas Wouters477c8d52006-05-27 19:21:47 +00003297Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003298pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003299{
3300 PyObject *u;
3301
3302 if (left < 0)
3303 left = 0;
3304 if (right < 0)
3305 right = 0;
3306
Tim Peters8fa5dd02001-09-12 02:18:30 +00003307 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003308 Py_INCREF(self);
3309 return (PyObject *)self;
3310 }
3311
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003312 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313 left + PyString_GET_SIZE(self) + right);
3314 if (u) {
3315 if (left)
3316 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003317 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003318 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003319 PyString_GET_SIZE(self));
3320 if (right)
3321 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3322 fill, right);
3323 }
3324
3325 return u;
3326}
3327
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003328PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003329"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003330"\n"
3331"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003332"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003333
3334static PyObject *
3335string_ljust(PyStringObject *self, PyObject *args)
3336{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003337 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003338 char fillchar = ' ';
3339
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003340 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341 return NULL;
3342
Tim Peters8fa5dd02001-09-12 02:18:30 +00003343 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344 Py_INCREF(self);
3345 return (PyObject*) self;
3346 }
3347
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003348 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003349}
3350
3351
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003352PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003353"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003354"\n"
3355"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003356"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003357
3358static PyObject *
3359string_rjust(PyStringObject *self, PyObject *args)
3360{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003361 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003362 char fillchar = ' ';
3363
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003364 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003365 return NULL;
3366
Tim Peters8fa5dd02001-09-12 02:18:30 +00003367 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003368 Py_INCREF(self);
3369 return (PyObject*) self;
3370 }
3371
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003372 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373}
3374
3375
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003376PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003377"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003378"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003379"Return S centered in a string of length width. Padding is\n"
3380"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381
3382static PyObject *
3383string_center(PyStringObject *self, PyObject *args)
3384{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003385 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003386 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003387 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003389 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390 return NULL;
3391
Tim Peters8fa5dd02001-09-12 02:18:30 +00003392 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003393 Py_INCREF(self);
3394 return (PyObject*) self;
3395 }
3396
3397 marg = width - PyString_GET_SIZE(self);
3398 left = marg / 2 + (marg & width & 1);
3399
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003400 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401}
3402
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003403PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003404"S.zfill(width) -> string\n"
3405"\n"
3406"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003407"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003408
3409static PyObject *
3410string_zfill(PyStringObject *self, PyObject *args)
3411{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003412 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003413 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003414 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003415 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003416
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003417 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003418 return NULL;
3419
3420 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003421 if (PyString_CheckExact(self)) {
3422 Py_INCREF(self);
3423 return (PyObject*) self;
3424 }
3425 else
3426 return PyString_FromStringAndSize(
3427 PyString_AS_STRING(self),
3428 PyString_GET_SIZE(self)
3429 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003430 }
3431
3432 fill = width - PyString_GET_SIZE(self);
3433
3434 s = pad(self, fill, 0, '0');
3435
3436 if (s == NULL)
3437 return NULL;
3438
3439 p = PyString_AS_STRING(s);
3440 if (p[fill] == '+' || p[fill] == '-') {
3441 /* move sign to beginning of string */
3442 p[0] = p[fill];
3443 p[fill] = '0';
3444 }
3445
3446 return (PyObject*) s;
3447}
3448
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003449PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003450"S.isspace() -> bool\n\
3451\n\
3452Return True if all characters in S are whitespace\n\
3453and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454
3455static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003456string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003457{
Fred Drakeba096332000-07-09 07:04:36 +00003458 register const unsigned char *p
3459 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003460 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461
Guido van Rossum4c08d552000-03-10 22:55:18 +00003462 /* Shortcut for single character strings */
3463 if (PyString_GET_SIZE(self) == 1 &&
3464 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003465 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003467 /* Special case for empty strings */
3468 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003469 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003470
Guido van Rossum4c08d552000-03-10 22:55:18 +00003471 e = p + PyString_GET_SIZE(self);
3472 for (; p < e; p++) {
3473 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003474 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003476 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477}
3478
3479
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003480PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003481"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003482\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003483Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003484and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003485
3486static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003487string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003488{
Fred Drakeba096332000-07-09 07:04:36 +00003489 register const unsigned char *p
3490 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003491 register const unsigned char *e;
3492
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003493 /* Shortcut for single character strings */
3494 if (PyString_GET_SIZE(self) == 1 &&
3495 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003496 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003497
3498 /* Special case for empty strings */
3499 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003500 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003501
3502 e = p + PyString_GET_SIZE(self);
3503 for (; p < e; p++) {
3504 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003505 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003506 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003507 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003508}
3509
3510
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003511PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003512"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003513\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003514Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003515and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003516
3517static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003518string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003519{
Fred Drakeba096332000-07-09 07:04:36 +00003520 register const unsigned char *p
3521 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003522 register const unsigned char *e;
3523
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003524 /* Shortcut for single character strings */
3525 if (PyString_GET_SIZE(self) == 1 &&
3526 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003527 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003528
3529 /* Special case for empty strings */
3530 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003531 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003532
3533 e = p + PyString_GET_SIZE(self);
3534 for (; p < e; p++) {
3535 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003536 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003537 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539}
3540
3541
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003542PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003543"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003545Return True if all characters in S are digits\n\
3546and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003547
3548static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003549string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550{
Fred Drakeba096332000-07-09 07:04:36 +00003551 register const unsigned char *p
3552 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003553 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555 /* Shortcut for single character strings */
3556 if (PyString_GET_SIZE(self) == 1 &&
3557 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003558 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003559
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003560 /* Special case for empty strings */
3561 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003563
Guido van Rossum4c08d552000-03-10 22:55:18 +00003564 e = p + PyString_GET_SIZE(self);
3565 for (; p < e; p++) {
3566 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003568 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003570}
3571
3572
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003573PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003575\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003577at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578
3579static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003580string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003581{
Fred Drakeba096332000-07-09 07:04:36 +00003582 register const unsigned char *p
3583 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003584 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585 int cased;
3586
Guido van Rossum4c08d552000-03-10 22:55:18 +00003587 /* Shortcut for single character strings */
3588 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003589 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003591 /* Special case for empty strings */
3592 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003594
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595 e = p + PyString_GET_SIZE(self);
3596 cased = 0;
3597 for (; p < e; p++) {
3598 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003600 else if (!cased && islower(*p))
3601 cased = 1;
3602 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003603 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003604}
3605
3606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003607PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003610Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003611at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612
3613static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003614string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615{
Fred Drakeba096332000-07-09 07:04:36 +00003616 register const unsigned char *p
3617 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003618 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619 int cased;
3620
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621 /* Shortcut for single character strings */
3622 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003625 /* Special case for empty strings */
3626 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003628
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 e = p + PyString_GET_SIZE(self);
3630 cased = 0;
3631 for (; p < e; p++) {
3632 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634 else if (!cased && isupper(*p))
3635 cased = 1;
3636 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638}
3639
3640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003641PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003644Return True if S is a titlecased string and there is at least one\n\
3645character in S, i.e. uppercase characters may only follow uncased\n\
3646characters and lowercase characters only cased ones. Return False\n\
3647otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648
3649static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003650string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651{
Fred Drakeba096332000-07-09 07:04:36 +00003652 register const unsigned char *p
3653 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003654 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 int cased, previous_is_cased;
3656
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 /* Shortcut for single character strings */
3658 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003659 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003661 /* Special case for empty strings */
3662 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003664
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 e = p + PyString_GET_SIZE(self);
3666 cased = 0;
3667 previous_is_cased = 0;
3668 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003669 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670
3671 if (isupper(ch)) {
3672 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674 previous_is_cased = 1;
3675 cased = 1;
3676 }
3677 else if (islower(ch)) {
3678 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 previous_is_cased = 1;
3681 cased = 1;
3682 }
3683 else
3684 previous_is_cased = 0;
3685 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687}
3688
3689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003690PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003691"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692\n\
3693Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003694Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003695is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697static PyObject*
3698string_splitlines(PyStringObject *self, PyObject *args)
3699{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003700 register Py_ssize_t i;
3701 register Py_ssize_t j;
3702 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003703 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704 PyObject *list;
3705 PyObject *str;
3706 char *data;
3707
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003708 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 return NULL;
3710
3711 data = PyString_AS_STRING(self);
3712 len = PyString_GET_SIZE(self);
3713
Thomas Wouters477c8d52006-05-27 19:21:47 +00003714 /* This does not use the preallocated list because splitlines is
3715 usually run with hundreds of newlines. The overhead of
3716 switching between PyList_SET_ITEM and append causes about a
3717 2-3% slowdown for that common case. A smarter implementation
3718 could move the if check out, so the SET_ITEMs are done first
3719 and the appends only done when the prealloc buffer is full.
3720 That's too much work for little gain.*/
3721
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 list = PyList_New(0);
3723 if (!list)
3724 goto onError;
3725
3726 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003727 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003728
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 /* Find a line and append it */
3730 while (i < len && data[i] != '\n' && data[i] != '\r')
3731 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732
3733 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003734 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735 if (i < len) {
3736 if (data[i] == '\r' && i + 1 < len &&
3737 data[i+1] == '\n')
3738 i += 2;
3739 else
3740 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003741 if (keepends)
3742 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003744 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 j = i;
3746 }
3747 if (j < len) {
3748 SPLIT_APPEND(data, j, len);
3749 }
3750
3751 return list;
3752
3753 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003754 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755 return NULL;
3756}
3757
3758#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003759#undef SPLIT_ADD
3760#undef MAX_PREALLOC
3761#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003762
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003763static PyObject *
3764string_getnewargs(PyStringObject *v)
3765{
3766 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3767}
3768
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003769
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003770static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003771string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772 /* Counterparts of the obsolete stropmodule functions; except
3773 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003774 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3775 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003776 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003777 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3778 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003779 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3780 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3781 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3782 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3783 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3784 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3785 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003786 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3787 capitalize__doc__},
3788 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3789 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3790 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003791 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003792 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3793 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3794 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3795 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3796 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3797 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3798 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003799 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3800 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003801 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3802 startswith__doc__},
3803 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3804 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3805 swapcase__doc__},
3806 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3807 translate__doc__},
3808 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3809 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3810 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3811 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3812 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3813 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3814 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3815 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3816 expandtabs__doc__},
3817 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3818 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003819 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003820 {NULL, NULL} /* sentinel */
3821};
3822
Jeremy Hylton938ace62002-07-17 16:30:39 +00003823static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003824str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3825
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003826static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003827string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003828{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003829 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003830 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003831
Guido van Rossumae960af2001-08-30 03:11:59 +00003832 if (type != &PyString_Type)
3833 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003834 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3835 return NULL;
3836 if (x == NULL)
3837 return PyString_FromString("");
3838 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003839}
3840
Guido van Rossumae960af2001-08-30 03:11:59 +00003841static PyObject *
3842str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3843{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003844 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003845 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003846
3847 assert(PyType_IsSubtype(type, &PyString_Type));
3848 tmp = string_new(&PyString_Type, args, kwds);
3849 if (tmp == NULL)
3850 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003851 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003852 n = PyString_GET_SIZE(tmp);
3853 pnew = type->tp_alloc(type, n);
3854 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003855 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003856 ((PyStringObject *)pnew)->ob_shash =
3857 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003858 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003859 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003860 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003861 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003862}
3863
Guido van Rossumcacfc072002-05-24 19:01:59 +00003864static PyObject *
3865basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3866{
3867 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003868 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003869 return NULL;
3870}
3871
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003872static PyObject *
3873string_mod(PyObject *v, PyObject *w)
3874{
3875 if (!PyString_Check(v)) {
3876 Py_INCREF(Py_NotImplemented);
3877 return Py_NotImplemented;
3878 }
3879 return PyString_Format(v, w);
3880}
3881
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003882PyDoc_STRVAR(basestring_doc,
3883"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003884
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003885static PyNumberMethods string_as_number = {
3886 0, /*nb_add*/
3887 0, /*nb_subtract*/
3888 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003889 string_mod, /*nb_remainder*/
3890};
3891
3892
Guido van Rossumcacfc072002-05-24 19:01:59 +00003893PyTypeObject PyBaseString_Type = {
3894 PyObject_HEAD_INIT(&PyType_Type)
3895 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003896 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003897 0,
3898 0,
3899 0, /* tp_dealloc */
3900 0, /* tp_print */
3901 0, /* tp_getattr */
3902 0, /* tp_setattr */
3903 0, /* tp_compare */
3904 0, /* tp_repr */
3905 0, /* tp_as_number */
3906 0, /* tp_as_sequence */
3907 0, /* tp_as_mapping */
3908 0, /* tp_hash */
3909 0, /* tp_call */
3910 0, /* tp_str */
3911 0, /* tp_getattro */
3912 0, /* tp_setattro */
3913 0, /* tp_as_buffer */
3914 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3915 basestring_doc, /* tp_doc */
3916 0, /* tp_traverse */
3917 0, /* tp_clear */
3918 0, /* tp_richcompare */
3919 0, /* tp_weaklistoffset */
3920 0, /* tp_iter */
3921 0, /* tp_iternext */
3922 0, /* tp_methods */
3923 0, /* tp_members */
3924 0, /* tp_getset */
3925 &PyBaseObject_Type, /* tp_base */
3926 0, /* tp_dict */
3927 0, /* tp_descr_get */
3928 0, /* tp_descr_set */
3929 0, /* tp_dictoffset */
3930 0, /* tp_init */
3931 0, /* tp_alloc */
3932 basestring_new, /* tp_new */
3933 0, /* tp_free */
3934};
3935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003936PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003937"str(object) -> string\n\
3938\n\
3939Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003940If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003941
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003942static PyObject *str_iter(PyObject *seq);
3943
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003944PyTypeObject PyString_Type = {
3945 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003946 0,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00003947 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003948 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003949 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003950 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003951 (printfunc)string_print, /* tp_print */
3952 0, /* tp_getattr */
3953 0, /* tp_setattr */
3954 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003955 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003956 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003957 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003958 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003959 (hashfunc)string_hash, /* tp_hash */
3960 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003961 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003962 PyObject_GenericGetAttr, /* tp_getattro */
3963 0, /* tp_setattro */
3964 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003965 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3966 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003967 string_doc, /* tp_doc */
3968 0, /* tp_traverse */
3969 0, /* tp_clear */
3970 (richcmpfunc)string_richcompare, /* tp_richcompare */
3971 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003972 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003973 0, /* tp_iternext */
3974 string_methods, /* tp_methods */
3975 0, /* tp_members */
3976 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003977 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003978 0, /* tp_dict */
3979 0, /* tp_descr_get */
3980 0, /* tp_descr_set */
3981 0, /* tp_dictoffset */
3982 0, /* tp_init */
3983 0, /* tp_alloc */
3984 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003985 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003986};
3987
3988void
Fred Drakeba096332000-07-09 07:04:36 +00003989PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003990{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003992 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003993 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003994 if (w == NULL || !PyString_Check(*pv)) {
3995 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003996 *pv = NULL;
3997 return;
3998 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 v = string_concat((PyStringObject *) *pv, w);
4000 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004001 *pv = v;
4002}
4003
Guido van Rossum013142a1994-08-30 08:19:36 +00004004void
Fred Drakeba096332000-07-09 07:04:36 +00004005PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004006{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004007 PyString_Concat(pv, w);
4008 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004009}
4010
4011
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004012/* The following function breaks the notion that strings are immutable:
4013 it changes the size of a string. We get away with this only if there
4014 is only one module referencing the object. You can also think of it
4015 as creating a new string object and destroying the old one, only
4016 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004017 already be known to some other part of the code...
4018 Note that if there's not enough memory to resize the string, the original
4019 string object at *pv is deallocated, *pv is set to NULL, an "out of
4020 memory" exception is set, and -1 is returned. Else (on success) 0 is
4021 returned, and the value in *pv may or may not be the same as on input.
4022 As always, an extra byte is allocated for a trailing \0 byte (newsize
4023 does *not* include that), and a trailing \0 byte is stored.
4024*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004025
4026int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004027_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004028{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029 register PyObject *v;
4030 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004031 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004032 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4033 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004034 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035 Py_DECREF(v);
4036 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004037 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004038 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004039 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004040 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041 _Py_ForgetReference(v);
4042 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004043 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004044 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004045 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004047 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004048 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 _Py_NewReference(*pv);
4050 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004051 sv->ob_size = newsize;
4052 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004053 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004054 return 0;
4055}
Guido van Rossume5372401993-03-16 12:15:04 +00004056
4057/* Helpers for formatstring */
4058
Thomas Wouters477c8d52006-05-27 19:21:47 +00004059Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004060getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004061{
Thomas Wouters977485d2006-02-16 15:59:12 +00004062 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004063 if (argidx < arglen) {
4064 (*p_argidx)++;
4065 if (arglen < 0)
4066 return args;
4067 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004068 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070 PyErr_SetString(PyExc_TypeError,
4071 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004072 return NULL;
4073}
4074
Tim Peters38fd5b62000-09-21 05:43:11 +00004075/* Format codes
4076 * F_LJUST '-'
4077 * F_SIGN '+'
4078 * F_BLANK ' '
4079 * F_ALT '#'
4080 * F_ZERO '0'
4081 */
Guido van Rossume5372401993-03-16 12:15:04 +00004082#define F_LJUST (1<<0)
4083#define F_SIGN (1<<1)
4084#define F_BLANK (1<<2)
4085#define F_ALT (1<<3)
4086#define F_ZERO (1<<4)
4087
Thomas Wouters477c8d52006-05-27 19:21:47 +00004088Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004089formatfloat(char *buf, size_t buflen, int flags,
4090 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004091{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004092 /* fmt = '%#.' + `prec` + `type`
4093 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004094 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004095 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004096 x = PyFloat_AsDouble(v);
4097 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004098 PyErr_Format(PyExc_TypeError, "float argument required, "
4099 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004100 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004101 }
Guido van Rossume5372401993-03-16 12:15:04 +00004102 if (prec < 0)
4103 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004104 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4105 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004106 /* Worst case length calc to ensure no buffer overrun:
4107
4108 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004109 fmt = %#.<prec>g
4110 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004111 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004112 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004113
4114 'f' formats:
4115 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4116 len = 1 + 50 + 1 + prec = 52 + prec
4117
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004118 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004119 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004120
4121 */
4122 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4123 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004124 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004125 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004126 return -1;
4127 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004128 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4129 (flags&F_ALT) ? "#" : "",
4130 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004131 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004132 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004133}
4134
Tim Peters38fd5b62000-09-21 05:43:11 +00004135/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4136 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4137 * Python's regular ints.
4138 * Return value: a new PyString*, or NULL if error.
4139 * . *pbuf is set to point into it,
4140 * *plen set to the # of chars following that.
4141 * Caller must decref it when done using pbuf.
4142 * The string starting at *pbuf is of the form
4143 * "-"? ("0x" | "0X")? digit+
4144 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004145 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004146 * There will be at least prec digits, zero-filled on the left if
4147 * necessary to get that many.
4148 * val object to be converted
4149 * flags bitmask of format flags; only F_ALT is looked at
4150 * prec minimum number of digits; 0-fill on left if needed
4151 * type a character in [duoxX]; u acts the same as d
4152 *
4153 * CAUTION: o, x and X conversions on regular ints can never
4154 * produce a '-' sign, but can for Python's unbounded ints.
4155 */
4156PyObject*
4157_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4158 char **pbuf, int *plen)
4159{
4160 PyObject *result = NULL;
4161 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004162 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004163 int sign; /* 1 if '-', else 0 */
4164 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004165 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004166 int numdigits; /* len == numnondigits + numdigits */
4167 int numnondigits = 0;
4168
Guido van Rossumddefaf32007-01-14 03:31:43 +00004169 /* Avoid exceeding SSIZE_T_MAX */
4170 if (prec > PY_SSIZE_T_MAX-3) {
4171 PyErr_SetString(PyExc_OverflowError,
4172 "precision too large");
4173 return NULL;
4174 }
4175
4176
Tim Peters38fd5b62000-09-21 05:43:11 +00004177 switch (type) {
4178 case 'd':
4179 case 'u':
4180 result = val->ob_type->tp_str(val);
4181 break;
4182 case 'o':
4183 result = val->ob_type->tp_as_number->nb_oct(val);
4184 break;
4185 case 'x':
4186 case 'X':
4187 numnondigits = 2;
4188 result = val->ob_type->tp_as_number->nb_hex(val);
4189 break;
4190 default:
4191 assert(!"'type' not in [duoxX]");
4192 }
4193 if (!result)
4194 return NULL;
4195
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004196 buf = PyString_AsString(result);
4197 if (!buf) {
4198 Py_DECREF(result);
4199 return NULL;
4200 }
4201
Tim Peters38fd5b62000-09-21 05:43:11 +00004202 /* To modify the string in-place, there can only be one reference. */
4203 if (result->ob_refcnt != 1) {
4204 PyErr_BadInternalCall();
4205 return NULL;
4206 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004207 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004208 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004209 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4210 return NULL;
4211 }
4212 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004213 if (buf[len-1] == 'L') {
4214 --len;
4215 buf[len] = '\0';
4216 }
4217 sign = buf[0] == '-';
4218 numnondigits += sign;
4219 numdigits = len - numnondigits;
4220 assert(numdigits > 0);
4221
Tim Petersfff53252001-04-12 18:38:48 +00004222 /* Get rid of base marker unless F_ALT */
4223 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004224 /* Need to skip 0x, 0X or 0. */
4225 int skipped = 0;
4226 switch (type) {
4227 case 'o':
4228 assert(buf[sign] == '0');
4229 /* If 0 is only digit, leave it alone. */
4230 if (numdigits > 1) {
4231 skipped = 1;
4232 --numdigits;
4233 }
4234 break;
4235 case 'x':
4236 case 'X':
4237 assert(buf[sign] == '0');
4238 assert(buf[sign + 1] == 'x');
4239 skipped = 2;
4240 numnondigits -= 2;
4241 break;
4242 }
4243 if (skipped) {
4244 buf += skipped;
4245 len -= skipped;
4246 if (sign)
4247 buf[0] = '-';
4248 }
4249 assert(len == numnondigits + numdigits);
4250 assert(numdigits > 0);
4251 }
4252
4253 /* Fill with leading zeroes to meet minimum width. */
4254 if (prec > numdigits) {
4255 PyObject *r1 = PyString_FromStringAndSize(NULL,
4256 numnondigits + prec);
4257 char *b1;
4258 if (!r1) {
4259 Py_DECREF(result);
4260 return NULL;
4261 }
4262 b1 = PyString_AS_STRING(r1);
4263 for (i = 0; i < numnondigits; ++i)
4264 *b1++ = *buf++;
4265 for (i = 0; i < prec - numdigits; i++)
4266 *b1++ = '0';
4267 for (i = 0; i < numdigits; i++)
4268 *b1++ = *buf++;
4269 *b1 = '\0';
4270 Py_DECREF(result);
4271 result = r1;
4272 buf = PyString_AS_STRING(result);
4273 len = numnondigits + prec;
4274 }
4275
4276 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004277 if (type == 'X') {
4278 /* Need to convert all lower case letters to upper case.
4279 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004280 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004281 if (buf[i] >= 'a' && buf[i] <= 'x')
4282 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004283 }
4284 *pbuf = buf;
4285 *plen = len;
4286 return result;
4287}
4288
Thomas Wouters477c8d52006-05-27 19:21:47 +00004289Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004290formatint(char *buf, size_t buflen, int flags,
4291 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004292{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004293 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004294 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4295 + 1 + 1 = 24 */
4296 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004297 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004298 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004299
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004300 x = PyInt_AsLong(v);
4301 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004302 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4303 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004304 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004305 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004306 if (x < 0 && type == 'u') {
4307 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004308 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004309 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4310 sign = "-";
4311 else
4312 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004313 if (prec < 0)
4314 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004315
4316 if ((flags & F_ALT) &&
4317 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004318 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004319 * of issues that cause pain:
4320 * - when 0 is being converted, the C standard leaves off
4321 * the '0x' or '0X', which is inconsistent with other
4322 * %#x/%#X conversions and inconsistent with Python's
4323 * hex() function
4324 * - there are platforms that violate the standard and
4325 * convert 0 with the '0x' or '0X'
4326 * (Metrowerks, Compaq Tru64)
4327 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004328 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004329 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004330 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004331 * We can achieve the desired consistency by inserting our
4332 * own '0x' or '0X' prefix, and substituting %x/%X in place
4333 * of %#x/%#X.
4334 *
4335 * Note that this is the same approach as used in
4336 * formatint() in unicodeobject.c
4337 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004338 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4339 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340 }
4341 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004342 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4343 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004344 prec, type);
4345 }
4346
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004347 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4348 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004349 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004350 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004351 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004352 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004353 return -1;
4354 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004355 if (sign[0])
4356 PyOS_snprintf(buf, buflen, fmt, -x);
4357 else
4358 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004359 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004360}
4361
Thomas Wouters477c8d52006-05-27 19:21:47 +00004362Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004363formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004364{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004365 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004366 if (PyString_Check(v)) {
4367 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004368 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004369 }
4370 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004371 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004372 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004373 }
4374 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004375 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004376}
4377
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004378/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4379
4380 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4381 chars are formatted. XXX This is a magic number. Each formatting
4382 routine does bounds checking to ensure no overflow, but a better
4383 solution may be to malloc a buffer of appropriate size for each
4384 format. For now, the current solution is sufficient.
4385*/
4386#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004387
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004388PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004389PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004390{
4391 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004392 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004393 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004394 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004395 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004396 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004397 PyObject *dict = NULL;
4398 if (format == NULL || !PyString_Check(format) || args == NULL) {
4399 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004400 return NULL;
4401 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004402 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004403 fmt = PyString_AS_STRING(format);
4404 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004405 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004406 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004407 if (result == NULL)
4408 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004409 res = PyString_AsString(result);
4410 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004411 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004412 argidx = 0;
4413 }
4414 else {
4415 arglen = -1;
4416 argidx = -2;
4417 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004418 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4419 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004420 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004421 while (--fmtcnt >= 0) {
4422 if (*fmt != '%') {
4423 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004424 rescnt = fmtcnt + 100;
4425 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004426 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004427 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004428 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004429 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004430 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004431 }
4432 *res++ = *fmt++;
4433 }
4434 else {
4435 /* Got a format specifier */
4436 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004437 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004438 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004439 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004440 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004441 PyObject *v = NULL;
4442 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004443 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004444 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004445 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004446 char formatbuf[FORMATBUFLEN];
4447 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004448 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004449 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004450
Guido van Rossumda9c2711996-12-05 21:58:58 +00004451 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004452 if (*fmt == '(') {
4453 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004454 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004455 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004456 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004457
4458 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004459 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004460 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004461 goto error;
4462 }
4463 ++fmt;
4464 --fmtcnt;
4465 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004466 /* Skip over balanced parentheses */
4467 while (pcount > 0 && --fmtcnt >= 0) {
4468 if (*fmt == ')')
4469 --pcount;
4470 else if (*fmt == '(')
4471 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004472 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004473 }
4474 keylen = fmt - keystart - 1;
4475 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004476 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004477 "incomplete format key");
4478 goto error;
4479 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480 key = PyString_FromStringAndSize(keystart,
4481 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004482 if (key == NULL)
4483 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004484 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004486 args_owned = 0;
4487 }
4488 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004490 if (args == NULL) {
4491 goto error;
4492 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004493 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004494 arglen = -1;
4495 argidx = -2;
4496 }
Guido van Rossume5372401993-03-16 12:15:04 +00004497 while (--fmtcnt >= 0) {
4498 switch (c = *fmt++) {
4499 case '-': flags |= F_LJUST; continue;
4500 case '+': flags |= F_SIGN; continue;
4501 case ' ': flags |= F_BLANK; continue;
4502 case '#': flags |= F_ALT; continue;
4503 case '0': flags |= F_ZERO; continue;
4504 }
4505 break;
4506 }
4507 if (c == '*') {
4508 v = getnextarg(args, arglen, &argidx);
4509 if (v == NULL)
4510 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 if (!PyInt_Check(v)) {
4512 PyErr_SetString(PyExc_TypeError,
4513 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004514 goto error;
4515 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004517 if (width == -1 && PyErr_Occurred())
4518 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004519 if (width < 0) {
4520 flags |= F_LJUST;
4521 width = -width;
4522 }
Guido van Rossume5372401993-03-16 12:15:04 +00004523 if (--fmtcnt >= 0)
4524 c = *fmt++;
4525 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004526 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004527 width = c - '0';
4528 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004529 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004530 if (!isdigit(c))
4531 break;
4532 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 PyErr_SetString(
4534 PyExc_ValueError,
4535 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004536 goto error;
4537 }
4538 width = width*10 + (c - '0');
4539 }
4540 }
4541 if (c == '.') {
4542 prec = 0;
4543 if (--fmtcnt >= 0)
4544 c = *fmt++;
4545 if (c == '*') {
4546 v = getnextarg(args, arglen, &argidx);
4547 if (v == NULL)
4548 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004549 if (!PyInt_Check(v)) {
4550 PyErr_SetString(
4551 PyExc_TypeError,
4552 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004553 goto error;
4554 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004556 if (prec == -1 && PyErr_Occurred())
4557 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004558 if (prec < 0)
4559 prec = 0;
4560 if (--fmtcnt >= 0)
4561 c = *fmt++;
4562 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004563 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004564 prec = c - '0';
4565 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004566 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004567 if (!isdigit(c))
4568 break;
4569 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004570 PyErr_SetString(
4571 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004572 "prec too big");
4573 goto error;
4574 }
4575 prec = prec*10 + (c - '0');
4576 }
4577 }
4578 } /* prec */
4579 if (fmtcnt >= 0) {
4580 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004581 if (--fmtcnt >= 0)
4582 c = *fmt++;
4583 }
4584 }
4585 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004586 PyErr_SetString(PyExc_ValueError,
4587 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004588 goto error;
4589 }
4590 if (c != '%') {
4591 v = getnextarg(args, arglen, &argidx);
4592 if (v == NULL)
4593 goto error;
4594 }
4595 sign = 0;
4596 fill = ' ';
4597 switch (c) {
4598 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004599 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004600 len = 1;
4601 break;
4602 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004603 if (PyUnicode_Check(v)) {
4604 fmt = fmt_start;
4605 argidx = argidx_start;
4606 goto unicode;
4607 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004608 temp = _PyObject_Str(v);
4609 if (temp != NULL && PyUnicode_Check(temp)) {
4610 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004611 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004612 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004613 goto unicode;
4614 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004615 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004616 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004617 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004618 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004619 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004620 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004621 if (!PyString_Check(temp)) {
4622 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004623 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004624 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004625 goto error;
4626 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004627 pbuf = PyString_AS_STRING(temp);
4628 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004629 if (prec >= 0 && len > prec)
4630 len = prec;
4631 break;
4632 case 'i':
4633 case 'd':
4634 case 'u':
4635 case 'o':
4636 case 'x':
4637 case 'X':
4638 if (c == 'i')
4639 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004640 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004641 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004642 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004643 prec, c, &pbuf, &ilen);
4644 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004645 if (!temp)
4646 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004647 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004648 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004649 else {
4650 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004651 len = formatint(pbuf,
4652 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004653 flags, prec, c, v);
4654 if (len < 0)
4655 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004656 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004657 }
4658 if (flags & F_ZERO)
4659 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004660 break;
4661 case 'e':
4662 case 'E':
4663 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004664 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004665 case 'g':
4666 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004667 if (c == 'F')
4668 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004669 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004670 len = formatfloat(pbuf, sizeof(formatbuf),
4671 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004672 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004673 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004674 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004675 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004676 fill = '0';
4677 break;
4678 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004679 if (PyUnicode_Check(v)) {
4680 fmt = fmt_start;
4681 argidx = argidx_start;
4682 goto unicode;
4683 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004684 pbuf = formatbuf;
4685 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004686 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004687 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004688 break;
4689 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004690 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004691 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004692 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004693 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004694 (Py_ssize_t)(fmt - 1 -
4695 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004696 goto error;
4697 }
4698 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004699 if (*pbuf == '-' || *pbuf == '+') {
4700 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004701 len--;
4702 }
4703 else if (flags & F_SIGN)
4704 sign = '+';
4705 else if (flags & F_BLANK)
4706 sign = ' ';
4707 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004708 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004709 }
4710 if (width < len)
4711 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004712 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004713 reslen -= rescnt;
4714 rescnt = width + fmtcnt + 100;
4715 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004716 if (reslen < 0) {
4717 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004718 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004719 return PyErr_NoMemory();
4720 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004721 if (_PyString_Resize(&result, reslen) < 0) {
4722 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004723 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004724 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004725 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004726 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004727 }
4728 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004729 if (fill != ' ')
4730 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004731 rescnt--;
4732 if (width > len)
4733 width--;
4734 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004735 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4736 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004737 assert(pbuf[1] == c);
4738 if (fill != ' ') {
4739 *res++ = *pbuf++;
4740 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004741 }
Tim Petersfff53252001-04-12 18:38:48 +00004742 rescnt -= 2;
4743 width -= 2;
4744 if (width < 0)
4745 width = 0;
4746 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004747 }
4748 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004749 do {
4750 --rescnt;
4751 *res++ = fill;
4752 } while (--width > len);
4753 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004754 if (fill == ' ') {
4755 if (sign)
4756 *res++ = sign;
4757 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004758 (c == 'x' || c == 'X')) {
4759 assert(pbuf[0] == '0');
4760 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004761 *res++ = *pbuf++;
4762 *res++ = *pbuf++;
4763 }
4764 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004765 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004766 res += len;
4767 rescnt -= len;
4768 while (--width >= len) {
4769 --rescnt;
4770 *res++ = ' ';
4771 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004772 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004773 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004774 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004775 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004776 goto error;
4777 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004778 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004779 } /* '%' */
4780 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004781 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004782 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004783 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004784 goto error;
4785 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004786 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004787 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004788 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004789 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004790 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004791
4792 unicode:
4793 if (args_owned) {
4794 Py_DECREF(args);
4795 args_owned = 0;
4796 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004797 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004798 if (PyTuple_Check(orig_args) && argidx > 0) {
4799 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004800 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004801 v = PyTuple_New(n);
4802 if (v == NULL)
4803 goto error;
4804 while (--n >= 0) {
4805 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4806 Py_INCREF(w);
4807 PyTuple_SET_ITEM(v, n, w);
4808 }
4809 args = v;
4810 } else {
4811 Py_INCREF(orig_args);
4812 args = orig_args;
4813 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004814 args_owned = 1;
4815 /* Take what we have of the result and let the Unicode formatting
4816 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004817 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004818 if (_PyString_Resize(&result, rescnt))
4819 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004820 fmtcnt = PyString_GET_SIZE(format) - \
4821 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004822 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4823 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004824 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004825 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004826 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004827 if (v == NULL)
4828 goto error;
4829 /* Paste what we have (result) to what the Unicode formatting
4830 function returned (v) and return the result (or error) */
4831 w = PyUnicode_Concat(result, v);
4832 Py_DECREF(result);
4833 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004834 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004835 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004836
Guido van Rossume5372401993-03-16 12:15:04 +00004837 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004838 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004839 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004840 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004841 }
Guido van Rossume5372401993-03-16 12:15:04 +00004842 return NULL;
4843}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004844
Guido van Rossum2a61e741997-01-18 07:55:05 +00004845void
Fred Drakeba096332000-07-09 07:04:36 +00004846PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004847{
4848 register PyStringObject *s = (PyStringObject *)(*p);
4849 PyObject *t;
4850 if (s == NULL || !PyString_Check(s))
4851 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004852 /* If it's a string subclass, we don't really know what putting
4853 it in the interned dict might do. */
4854 if (!PyString_CheckExact(s))
4855 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004856 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004857 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004858 if (interned == NULL) {
4859 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004860 if (interned == NULL) {
4861 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004862 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004863 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004864 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004865 t = PyDict_GetItem(interned, (PyObject *)s);
4866 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004867 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004868 Py_DECREF(*p);
4869 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004870 return;
4871 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004872
Armin Rigo79f7ad22004-08-07 19:27:39 +00004873 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004874 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004875 return;
4876 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004877 /* The two references in interned are not counted by refcnt.
4878 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004879 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004880 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004881}
4882
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004883void
4884PyString_InternImmortal(PyObject **p)
4885{
4886 PyString_InternInPlace(p);
4887 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4888 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4889 Py_INCREF(*p);
4890 }
4891}
4892
Guido van Rossum2a61e741997-01-18 07:55:05 +00004893
4894PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004895PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004896{
4897 PyObject *s = PyString_FromString(cp);
4898 if (s == NULL)
4899 return NULL;
4900 PyString_InternInPlace(&s);
4901 return s;
4902}
4903
Guido van Rossum8cf04761997-08-02 02:57:45 +00004904void
Fred Drakeba096332000-07-09 07:04:36 +00004905PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004906{
4907 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004908 for (i = 0; i < UCHAR_MAX + 1; i++) {
4909 Py_XDECREF(characters[i]);
4910 characters[i] = NULL;
4911 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004912 Py_XDECREF(nullstring);
4913 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004914}
Barry Warsawa903ad982001-02-23 16:40:48 +00004915
Barry Warsawa903ad982001-02-23 16:40:48 +00004916void _Py_ReleaseInternedStrings(void)
4917{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004918 PyObject *keys;
4919 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004920 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004921 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004922
4923 if (interned == NULL || !PyDict_Check(interned))
4924 return;
4925 keys = PyDict_Keys(interned);
4926 if (keys == NULL || !PyList_Check(keys)) {
4927 PyErr_Clear();
4928 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004929 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004930
4931 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4932 detector, interned strings are not forcibly deallocated; rather, we
4933 give them their stolen references back, and then clear and DECREF
4934 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004935
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004936 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004937 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4938 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939 for (i = 0; i < n; i++) {
4940 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4941 switch (s->ob_sstate) {
4942 case SSTATE_NOT_INTERNED:
4943 /* XXX Shouldn't happen */
4944 break;
4945 case SSTATE_INTERNED_IMMORTAL:
4946 s->ob_refcnt += 1;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004947 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004948 break;
4949 case SSTATE_INTERNED_MORTAL:
4950 s->ob_refcnt += 2;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004951 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 break;
4953 default:
4954 Py_FatalError("Inconsistent interned string state.");
4955 }
4956 s->ob_sstate = SSTATE_NOT_INTERNED;
4957 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004958 fprintf(stderr, "total size of all interned strings: "
4959 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4960 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961 Py_DECREF(keys);
4962 PyDict_Clear(interned);
4963 Py_DECREF(interned);
4964 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004965}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004966
4967
4968/*********************** Str Iterator ****************************/
4969
4970typedef struct {
4971 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004972 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004973 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4974} striterobject;
4975
4976static void
4977striter_dealloc(striterobject *it)
4978{
4979 _PyObject_GC_UNTRACK(it);
4980 Py_XDECREF(it->it_seq);
4981 PyObject_GC_Del(it);
4982}
4983
4984static int
4985striter_traverse(striterobject *it, visitproc visit, void *arg)
4986{
4987 Py_VISIT(it->it_seq);
4988 return 0;
4989}
4990
4991static PyObject *
4992striter_next(striterobject *it)
4993{
4994 PyStringObject *seq;
4995 PyObject *item;
4996
4997 assert(it != NULL);
4998 seq = it->it_seq;
4999 if (seq == NULL)
5000 return NULL;
5001 assert(PyString_Check(seq));
5002
5003 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005004 item = PyString_FromStringAndSize(
5005 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005006 if (item != NULL)
5007 ++it->it_index;
5008 return item;
5009 }
5010
5011 Py_DECREF(seq);
5012 it->it_seq = NULL;
5013 return NULL;
5014}
5015
5016static PyObject *
5017striter_len(striterobject *it)
5018{
5019 Py_ssize_t len = 0;
5020 if (it->it_seq)
5021 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5022 return PyInt_FromSsize_t(len);
5023}
5024
Guido van Rossum49d6b072006-08-17 21:11:47 +00005025PyDoc_STRVAR(length_hint_doc,
5026 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005027
5028static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005029 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5030 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005031 {NULL, NULL} /* sentinel */
5032};
5033
5034PyTypeObject PyStringIter_Type = {
5035 PyObject_HEAD_INIT(&PyType_Type)
5036 0, /* ob_size */
Guido van Rossum49d6b072006-08-17 21:11:47 +00005037 "striterator", /* tp_name */
5038 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005039 0, /* tp_itemsize */
5040 /* methods */
5041 (destructor)striter_dealloc, /* tp_dealloc */
5042 0, /* tp_print */
5043 0, /* tp_getattr */
5044 0, /* tp_setattr */
5045 0, /* tp_compare */
5046 0, /* tp_repr */
5047 0, /* tp_as_number */
5048 0, /* tp_as_sequence */
5049 0, /* tp_as_mapping */
5050 0, /* tp_hash */
5051 0, /* tp_call */
5052 0, /* tp_str */
5053 PyObject_GenericGetAttr, /* tp_getattro */
5054 0, /* tp_setattro */
5055 0, /* tp_as_buffer */
5056 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5057 0, /* tp_doc */
5058 (traverseproc)striter_traverse, /* tp_traverse */
5059 0, /* tp_clear */
5060 0, /* tp_richcompare */
5061 0, /* tp_weaklistoffset */
5062 PyObject_SelfIter, /* tp_iter */
5063 (iternextfunc)striter_next, /* tp_iternext */
5064 striter_methods, /* tp_methods */
5065 0,
5066};
5067
5068static PyObject *
5069str_iter(PyObject *seq)
5070{
5071 striterobject *it;
5072
5073 if (!PyString_Check(seq)) {
5074 PyErr_BadInternalCall();
5075 return NULL;
5076 }
5077 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5078 if (it == NULL)
5079 return NULL;
5080 it->it_index = 0;
5081 Py_INCREF(seq);
5082 it->it_seq = (PyStringObject *)seq;
5083 _PyObject_GC_TRACK(it);
5084 return (PyObject *)it;
5085}