blob: 7a35974bc204c7e8049d0faac6974cacfd4e343e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000732 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000767PyObject *
768PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000770 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000772 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000773 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000774 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000775 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000776 PyErr_SetString(PyExc_OverflowError,
777 "string is too large to make repr");
778 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000779 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000781 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 }
783 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000784 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000785 register Py_UNICODE c;
786 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 int quote;
788
Thomas Wouters7e474022000-07-16 12:04:32 +0000789 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000791 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000792 char *test, *start;
793 start = PyString_AS_STRING(op);
794 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (*test == '"') {
796 quote = '\''; /* switch back to single quote */
797 goto decided;
798 }
799 else if (*test == '\'')
800 quote = '"';
801 }
802 decided:
803 ;
804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000806 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000807 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000808 /* There's at least enough room for a hex escape
809 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000810 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000814 else if (c == '\t')
815 *p++ = '\\', *p++ = 't';
816 else if (c == '\n')
817 *p++ = '\\', *p++ = 'n';
818 else if (c == '\r')
819 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000821 *p++ = '\\';
822 *p++ = 'x';
823 *p++ = hexdigits[(c & 0xf0) >> 4];
824 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 else
827 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000829 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000832 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
833 Py_DECREF(v);
834 return NULL;
835 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000836 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838}
839
Guido van Rossum189f1df2001-05-01 16:51:53 +0000840static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841string_repr(PyObject *op)
842{
843 return PyString_Repr(op, 1);
844}
845
846static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000847string_str(PyObject *s)
848{
Tim Petersc9933152001-10-16 20:18:24 +0000849 assert(PyString_Check(s));
850 if (PyString_CheckExact(s)) {
851 Py_INCREF(s);
852 return s;
853 }
854 else {
855 /* Subtype -- return genuine string with the same value. */
856 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000857 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000858 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859}
860
Martin v. Löwis18e16552006-02-15 17:27:45 +0000861static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000862string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000864 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865}
866
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000867static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000868string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000870 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871 register PyStringObject *op;
872 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000873 if (PyUnicode_Check(bb))
874 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000875 if (PyBytes_Check(bb))
876 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000877 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000878 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 return NULL;
881 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000882#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000884 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000885 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000886 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000887 Py_INCREF(bb);
888 return bb;
889 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 Py_INCREF(a);
891 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000893 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000894 if (size < 0) {
895 PyErr_SetString(PyExc_OverflowError,
896 "strings are too large to concat");
897 return NULL;
898 }
899
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000900 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000901 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000902 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000904 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000905 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000906 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000907 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
908 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000909 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911#undef b
912}
913
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000917 register Py_ssize_t i;
918 register Py_ssize_t j;
919 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000920 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000921 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 if (n < 0)
923 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000924 /* watch out for overflows: the size can overflow int,
925 * and the # of bytes needed can overflow size_t
926 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000927 size = Py_Size(a) * n;
928 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000929 PyErr_SetString(PyExc_OverflowError,
930 "repeated string is too long");
931 return NULL;
932 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000933 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 Py_INCREF(a);
935 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 }
Tim Peterse7c05322004-06-27 17:24:49 +0000937 nbytes = (size_t)size;
938 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000944 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000945 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000947 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000948 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000949 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000950 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000951 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000952 memset(op->ob_sval, a->ob_sval[0] , n);
953 return (PyObject *) op;
954 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000955 i = 0;
956 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000957 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
958 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 }
960 while (i < size) {
961 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 i += j;
964 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966}
967
968/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
969
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000971string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000972 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +0000973 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974{
975 if (i < 0)
976 i = 0;
977 if (j < 0)
978 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000979 if (j > Py_Size(a))
980 j = Py_Size(a);
981 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000982 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 Py_INCREF(a);
984 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985 }
986 if (j < i)
987 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000988 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989}
990
Guido van Rossum9284a572000-03-07 15:53:43 +0000991static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000992string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000993{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000994 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000995 if (PyUnicode_Check(sub_obj))
996 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000997 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000998 PyErr_Format(PyExc_TypeError,
999 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001000 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001001 return -1;
1002 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001003 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001004
Thomas Wouters477c8d52006-05-27 19:21:47 +00001005 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001006}
1007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001011 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001012 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001013 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001014 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015 return NULL;
1016 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001017 pchar = a->ob_sval[i];
1018 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001019 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001020 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001021 else {
1022#ifdef COUNT_ALLOCS
1023 one_strings++;
1024#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001025 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001026 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001027 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
Martin v. Löwiscd353062001-05-24 16:56:35 +00001030static PyObject*
1031string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001033 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 Py_ssize_t len_a, len_b;
1035 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001036 PyObject *result;
1037
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001038 /* Make sure both arguments are strings. */
1039 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001040 result = Py_NotImplemented;
1041 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001042 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001043 if (a == b) {
1044 switch (op) {
1045 case Py_EQ:case Py_LE:case Py_GE:
1046 result = Py_True;
1047 goto out;
1048 case Py_NE:case Py_LT:case Py_GT:
1049 result = Py_False;
1050 goto out;
1051 }
1052 }
1053 if (op == Py_EQ) {
1054 /* Supporting Py_NE here as well does not save
1055 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001058 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001059 result = Py_True;
1060 } else {
1061 result = Py_False;
1062 }
1063 goto out;
1064 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001065 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001066 min_len = (len_a < len_b) ? len_a : len_b;
1067 if (min_len > 0) {
1068 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1069 if (c==0)
1070 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001071 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001072 c = 0;
1073 if (c == 0)
1074 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1075 switch (op) {
1076 case Py_LT: c = c < 0; break;
1077 case Py_LE: c = c <= 0; break;
1078 case Py_EQ: assert(0); break; /* unreachable */
1079 case Py_NE: c = c != 0; break;
1080 case Py_GT: c = c > 0; break;
1081 case Py_GE: c = c >= 0; break;
1082 default:
1083 result = Py_NotImplemented;
1084 goto out;
1085 }
1086 result = c ? Py_True : Py_False;
1087 out:
1088 Py_INCREF(result);
1089 return result;
1090}
1091
1092int
1093_PyString_Eq(PyObject *o1, PyObject *o2)
1094{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 PyStringObject *a = (PyStringObject*) o1;
1096 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001097 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001099 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100}
1101
Guido van Rossum9bfef441993-03-29 10:43:31 +00001102static long
Fred Drakeba096332000-07-09 07:04:36 +00001103string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001104{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001105 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 register unsigned char *p;
1107 register long x;
1108
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001109 if (a->ob_shash != -1)
1110 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001111 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001112 p = (unsigned char *) a->ob_sval;
1113 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001114 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001115 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001116 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001117 if (x == -1)
1118 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001119 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001120 return x;
1121}
1122
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001123static PyObject*
1124string_subscript(PyStringObject* self, PyObject* item)
1125{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001126 if (PyIndex_Check(item)) {
1127 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001128 if (i == -1 && PyErr_Occurred())
1129 return NULL;
1130 if (i < 0)
1131 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001132 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001133 }
1134 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001135 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001136 char* source_buf;
1137 char* result_buf;
1138 PyObject* result;
1139
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141 PyString_GET_SIZE(self),
1142 &start, &stop, &step, &slicelength) < 0) {
1143 return NULL;
1144 }
1145
1146 if (slicelength <= 0) {
1147 return PyString_FromStringAndSize("", 0);
1148 }
1149 else {
1150 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001151 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001152 if (result_buf == NULL)
1153 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001154
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001155 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001156 cur += step, i++) {
1157 result_buf[i] = source_buf[cur];
1158 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001159
1160 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001161 slicelength);
1162 PyMem_Free(result_buf);
1163 return result;
1164 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001165 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001167 PyErr_Format(PyExc_TypeError,
1168 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001169 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001170 return NULL;
1171 }
1172}
1173
Martin v. Löwis18e16552006-02-15 17:27:45 +00001174static Py_ssize_t
1175string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001176{
1177 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001178 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001179 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001180 return -1;
1181 }
1182 *ptr = (void *)self->ob_sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001183 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001184}
1185
Martin v. Löwis18e16552006-02-15 17:27:45 +00001186static Py_ssize_t
1187string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001188{
Guido van Rossum045e6881997-09-08 18:30:11 +00001189 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001190 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001191 return -1;
1192}
1193
Martin v. Löwis18e16552006-02-15 17:27:45 +00001194static Py_ssize_t
1195string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001196{
1197 if ( lenp )
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001198 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001199 return 1;
1200}
1201
Martin v. Löwis18e16552006-02-15 17:27:45 +00001202static Py_ssize_t
1203string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001204{
1205 if ( index != 0 ) {
1206 PyErr_SetString(PyExc_SystemError,
1207 "accessing non-existent string segment");
1208 return -1;
1209 }
1210 *ptr = self->ob_sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001211 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001212}
1213
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001214static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001215 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001216 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001217 (ssizeargfunc)string_repeat, /*sq_repeat*/
1218 (ssizeargfunc)string_item, /*sq_item*/
1219 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001220 0, /*sq_ass_item*/
1221 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001222 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001223};
1224
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001225static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001226 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227 (binaryfunc)string_subscript,
1228 0,
1229};
1230
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001232 (readbufferproc)string_buffer_getreadbuf,
1233 (writebufferproc)string_buffer_getwritebuf,
1234 (segcountproc)string_buffer_getsegcount,
1235 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236};
1237
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001238
1239
1240#define LEFTSTRIP 0
1241#define RIGHTSTRIP 1
1242#define BOTHSTRIP 2
1243
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001244/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001245static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1246
1247#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001248
Thomas Wouters477c8d52006-05-27 19:21:47 +00001249
1250/* Don't call if length < 2 */
1251#define Py_STRING_MATCH(target, offset, pattern, length) \
1252 (target[offset] == pattern[0] && \
1253 target[offset+length-1] == pattern[length-1] && \
1254 !memcmp(target+offset+1, pattern+1, length-2) )
1255
1256
1257/* Overallocate the initial list to reduce the number of reallocs for small
1258 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1259 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1260 text (roughly 11 words per line) and field delimited data (usually 1-10
1261 fields). For large strings the split algorithms are bandwidth limited
1262 so increasing the preallocation likely will not improve things.*/
1263
1264#define MAX_PREALLOC 12
1265
1266/* 5 splits gives 6 elements */
1267#define PREALLOC_SIZE(maxsplit) \
1268 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1269
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001270#define SPLIT_APPEND(data, left, right) \
1271 str = PyString_FromStringAndSize((data) + (left), \
1272 (right) - (left)); \
1273 if (str == NULL) \
1274 goto onError; \
1275 if (PyList_Append(list, str)) { \
1276 Py_DECREF(str); \
1277 goto onError; \
1278 } \
1279 else \
1280 Py_DECREF(str);
1281
Thomas Wouters477c8d52006-05-27 19:21:47 +00001282#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001283 str = PyString_FromStringAndSize((data) + (left), \
1284 (right) - (left)); \
1285 if (str == NULL) \
1286 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 if (count < MAX_PREALLOC) { \
1288 PyList_SET_ITEM(list, count, str); \
1289 } else { \
1290 if (PyList_Append(list, str)) { \
1291 Py_DECREF(str); \
1292 goto onError; \
1293 } \
1294 else \
1295 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001296 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001300#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001301
1302#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1303#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1304#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1305#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1306
1307Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001308split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001310 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001311 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001312 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313
1314 if (list == NULL)
1315 return NULL;
1316
Thomas Wouters477c8d52006-05-27 19:21:47 +00001317 i = j = 0;
1318
1319 while (maxsplit-- > 0) {
1320 SKIP_SPACE(s, i, len);
1321 if (i==len) break;
1322 j = i; i++;
1323 SKIP_NONSPACE(s, i, len);
1324 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001326
1327 if (i < len) {
1328 /* Only occurs when maxsplit was reached */
1329 /* Skip any remaining whitespace and copy to end of string */
1330 SKIP_SPACE(s, i, len);
1331 if (i != len)
1332 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001334 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001336 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 Py_DECREF(list);
1338 return NULL;
1339}
1340
Thomas Wouters477c8d52006-05-27 19:21:47 +00001341Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001343{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001344 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001345 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001346 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347
1348 if (list == NULL)
1349 return NULL;
1350
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 i = j = 0;
1352 while ((j < len) && (maxcount-- > 0)) {
1353 for(; j<len; j++) {
1354 /* I found that using memchr makes no difference */
1355 if (s[j] == ch) {
1356 SPLIT_ADD(s, i, j);
1357 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001358 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001359 }
1360 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001362 if (i <= len) {
1363 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001364 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001366 return list;
1367
1368 onError:
1369 Py_DECREF(list);
1370 return NULL;
1371}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001373PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374"S.split([sep [,maxsplit]]) -> list of strings\n\
1375\n\
1376Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001378splits are done. If sep is not specified or is None, any\n\
1379whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380
1381static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001382string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001384 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001385 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001387 PyObject *list, *str, *subobj = Py_None;
1388#ifdef USE_FAST
1389 Py_ssize_t pos;
1390#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001392 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001395 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 if (PyString_Check(subobj)) {
1399 sub = PyString_AS_STRING(subobj);
1400 n = PyString_GET_SIZE(subobj);
1401 }
1402 else if (PyUnicode_Check(subobj))
1403 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1404 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1405 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001406
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 if (n == 0) {
1408 PyErr_SetString(PyExc_ValueError, "empty separator");
1409 return NULL;
1410 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 else if (n == 1)
1412 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413
Thomas Wouters477c8d52006-05-27 19:21:47 +00001414 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 if (list == NULL)
1416 return NULL;
1417
Thomas Wouters477c8d52006-05-27 19:21:47 +00001418#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001420 while (maxsplit-- > 0) {
1421 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1422 if (pos < 0)
1423 break;
1424 j = i+pos;
1425 SPLIT_ADD(s, i, j);
1426 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001428#else
1429 i = j = 0;
1430 while ((j+n <= len) && (maxsplit-- > 0)) {
1431 for (; j+n <= len; j++) {
1432 if (Py_STRING_MATCH(s, j, sub, n)) {
1433 SPLIT_ADD(s, i, j);
1434 i = j = j + n;
1435 break;
1436 }
1437 }
1438 }
1439#endif
1440 SPLIT_ADD(s, i, len);
1441 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001442 return list;
1443
Thomas Wouters477c8d52006-05-27 19:21:47 +00001444 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445 Py_DECREF(list);
1446 return NULL;
1447}
1448
Thomas Wouters477c8d52006-05-27 19:21:47 +00001449PyDoc_STRVAR(partition__doc__,
1450"S.partition(sep) -> (head, sep, tail)\n\
1451\n\
1452Searches for the separator sep in S, and returns the part before it,\n\
1453the separator itself, and the part after it. If the separator is not\n\
1454found, returns S and two empty strings.");
1455
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001456static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001457string_partition(PyStringObject *self, PyObject *sep_obj)
1458{
1459 const char *sep;
1460 Py_ssize_t sep_len;
1461
1462 if (PyString_Check(sep_obj)) {
1463 sep = PyString_AS_STRING(sep_obj);
1464 sep_len = PyString_GET_SIZE(sep_obj);
1465 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001466 else if (PyUnicode_Check(sep_obj))
1467 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001468 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1469 return NULL;
1470
1471 return stringlib_partition(
1472 (PyObject*) self,
1473 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1474 sep_obj, sep, sep_len
1475 );
1476}
1477
1478PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001479"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001480\n\
1481Searches for the separator sep in S, starting at the end of S, and returns\n\
1482the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001483separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001484
1485static PyObject *
1486string_rpartition(PyStringObject *self, PyObject *sep_obj)
1487{
1488 const char *sep;
1489 Py_ssize_t sep_len;
1490
1491 if (PyString_Check(sep_obj)) {
1492 sep = PyString_AS_STRING(sep_obj);
1493 sep_len = PyString_GET_SIZE(sep_obj);
1494 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001495 else if (PyUnicode_Check(sep_obj))
1496 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001497 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1498 return NULL;
1499
1500 return stringlib_rpartition(
1501 (PyObject*) self,
1502 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1503 sep_obj, sep, sep_len
1504 );
1505}
1506
1507Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001508rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001509{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001510 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001512 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001513
1514 if (list == NULL)
1515 return NULL;
1516
Thomas Wouters477c8d52006-05-27 19:21:47 +00001517 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001518
Thomas Wouters477c8d52006-05-27 19:21:47 +00001519 while (maxsplit-- > 0) {
1520 RSKIP_SPACE(s, i);
1521 if (i<0) break;
1522 j = i; i--;
1523 RSKIP_NONSPACE(s, i);
1524 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001525 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001526 if (i >= 0) {
1527 /* Only occurs when maxsplit was reached */
1528 /* Skip any remaining whitespace and copy to beginning of string */
1529 RSKIP_SPACE(s, i);
1530 if (i >= 0)
1531 SPLIT_ADD(s, 0, i + 1);
1532
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001533 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001534 FIX_PREALLOC_SIZE(list);
1535 if (PyList_Reverse(list) < 0)
1536 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001537 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001538 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001539 Py_DECREF(list);
1540 return NULL;
1541}
1542
Thomas Wouters477c8d52006-05-27 19:21:47 +00001543Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001544rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001545{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001546 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001547 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001549
1550 if (list == NULL)
1551 return NULL;
1552
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 i = j = len - 1;
1554 while ((i >= 0) && (maxcount-- > 0)) {
1555 for (; i >= 0; i--) {
1556 if (s[i] == ch) {
1557 SPLIT_ADD(s, i + 1, j + 1);
1558 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001559 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 }
1561 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001562 }
1563 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001565 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 FIX_PREALLOC_SIZE(list);
1567 if (PyList_Reverse(list) < 0)
1568 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001569 return list;
1570
1571 onError:
1572 Py_DECREF(list);
1573 return NULL;
1574}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001575
1576PyDoc_STRVAR(rsplit__doc__,
1577"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1578\n\
1579Return a list of the words in the string S, using sep as the\n\
1580delimiter string, starting at the end of the string and working\n\
1581to the front. If maxsplit is given, at most maxsplit splits are\n\
1582done. If sep is not specified or is None, any whitespace string\n\
1583is a separator.");
1584
1585static PyObject *
1586string_rsplit(PyStringObject *self, PyObject *args)
1587{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001588 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001592
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001593 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001594 return NULL;
1595 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001596 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597 if (subobj == Py_None)
1598 return rsplit_whitespace(s, len, maxsplit);
1599 if (PyString_Check(subobj)) {
1600 sub = PyString_AS_STRING(subobj);
1601 n = PyString_GET_SIZE(subobj);
1602 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001603 else if (PyUnicode_Check(subobj))
1604 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001605 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1606 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001607
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001608 if (n == 0) {
1609 PyErr_SetString(PyExc_ValueError, "empty separator");
1610 return NULL;
1611 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 else if (n == 1)
1613 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001614
Thomas Wouters477c8d52006-05-27 19:21:47 +00001615 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001616 if (list == NULL)
1617 return NULL;
1618
1619 j = len;
1620 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621
Thomas Wouters477c8d52006-05-27 19:21:47 +00001622 while ( (i >= 0) && (maxsplit-- > 0) ) {
1623 for (; i>=0; i--) {
1624 if (Py_STRING_MATCH(s, i, sub, n)) {
1625 SPLIT_ADD(s, i + n, j);
1626 j = i;
1627 i -= n;
1628 break;
1629 }
1630 }
1631 }
1632 SPLIT_ADD(s, 0, j);
1633 FIX_PREALLOC_SIZE(list);
1634 if (PyList_Reverse(list) < 0)
1635 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001636 return list;
1637
Thomas Wouters477c8d52006-05-27 19:21:47 +00001638onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001639 Py_DECREF(list);
1640 return NULL;
1641}
1642
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001644PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645"S.join(sequence) -> string\n\
1646\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001647Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001648sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649
1650static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001651string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001652{
1653 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001654 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001657 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001658 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001659 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001660 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 seq = PySequence_Fast(orig, "");
1663 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001664 return NULL;
1665 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001666
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001667 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001668 if (seqlen == 0) {
1669 Py_DECREF(seq);
1670 return PyString_FromString("");
1671 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001673 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001674 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1675 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001676 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001677 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001678 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001680
Raymond Hettinger674f2412004-08-23 23:23:54 +00001681 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001682 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001683 * Do a pre-pass to figure out the total amount of space we'll
1684 * need (sz), see whether any argument is absurd, and defer to
1685 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001686 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001687 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001688 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 item = PySequence_Fast_GET_ITEM(seq, i);
1690 if (!PyString_Check(item)){
1691 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001692 /* Defer to Unicode join.
1693 * CAUTION: There's no gurantee that the
1694 * original sequence can be iterated over
1695 * again, so we must pass seq here.
1696 */
1697 PyObject *result;
1698 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001699 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001700 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001701 }
1702 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001703 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001704 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001705 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001706 Py_DECREF(seq);
1707 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001708 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001709 sz += PyString_GET_SIZE(item);
1710 if (i != 0)
1711 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001712 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001713 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001714 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001715 Py_DECREF(seq);
1716 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001718 }
1719
1720 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001721 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001722 if (res == NULL) {
1723 Py_DECREF(seq);
1724 return NULL;
1725 }
1726
1727 /* Catenate everything. */
1728 p = PyString_AS_STRING(res);
1729 for (i = 0; i < seqlen; ++i) {
1730 size_t n;
1731 item = PySequence_Fast_GET_ITEM(seq, i);
1732 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001733 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001734 p += n;
1735 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001736 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001737 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001738 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001740
Jeremy Hylton49048292000-07-11 03:28:17 +00001741 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743}
1744
Tim Peters52e155e2001-06-16 05:42:57 +00001745PyObject *
1746_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001747{
Tim Petersa7259592001-06-16 05:11:17 +00001748 assert(sep != NULL && PyString_Check(sep));
1749 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001750 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001751}
1752
Thomas Wouters477c8d52006-05-27 19:21:47 +00001753Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001754string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001755{
1756 if (*end > len)
1757 *end = len;
1758 else if (*end < 0)
1759 *end += len;
1760 if (*end < 0)
1761 *end = 0;
1762 if (*start < 0)
1763 *start += len;
1764 if (*start < 0)
1765 *start = 0;
1766}
1767
Thomas Wouters477c8d52006-05-27 19:21:47 +00001768Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001769string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001772 const char *sub;
1773 Py_ssize_t sub_len;
1774 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775
Thomas Wouters477c8d52006-05-27 19:21:47 +00001776 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1777 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001778 return -2;
1779 if (PyString_Check(subobj)) {
1780 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782 }
1783 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001784 return PyUnicode_Find(
1785 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001786 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001787 /* XXX - the "expected a character buffer object" is pretty
1788 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789 return -2;
1790
Thomas Wouters477c8d52006-05-27 19:21:47 +00001791 if (dir > 0)
1792 return stringlib_find_slice(
1793 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1794 sub, sub_len, start, end);
1795 else
1796 return stringlib_rfind_slice(
1797 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1798 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799}
1800
1801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001802PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803"S.find(sub [,start [,end]]) -> int\n\
1804\n\
1805Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001806such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807arguments start and end are interpreted as in slice notation.\n\
1808\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001809Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810
1811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001812string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001814 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 if (result == -2)
1816 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001817 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818}
1819
1820
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822"S.index(sub [,start [,end]]) -> int\n\
1823\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825
1826static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001827string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001829 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 if (result == -2)
1831 return NULL;
1832 if (result == -1) {
1833 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001834 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 return NULL;
1836 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001837 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838}
1839
1840
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001841PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842"S.rfind(sub [,start [,end]]) -> int\n\
1843\n\
1844Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001845such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846arguments start and end are interpreted as in slice notation.\n\
1847\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001848Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849
1850static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001851string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001853 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 if (result == -2)
1855 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001856 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857}
1858
1859
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861"S.rindex(sub [,start [,end]]) -> int\n\
1862\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001863Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864
1865static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001866string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001868 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 if (result == -2)
1870 return NULL;
1871 if (result == -1) {
1872 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001873 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874 return NULL;
1875 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877}
1878
1879
Thomas Wouters477c8d52006-05-27 19:21:47 +00001880Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001881do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1882{
1883 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001884 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001885 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001886 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1887 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001888
1889 i = 0;
1890 if (striptype != RIGHTSTRIP) {
1891 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892 i++;
1893 }
1894 }
1895
1896 j = len;
1897 if (striptype != LEFTSTRIP) {
1898 do {
1899 j--;
1900 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901 j++;
1902 }
1903
1904 if (i == 0 && j == len && PyString_CheckExact(self)) {
1905 Py_INCREF(self);
1906 return (PyObject*)self;
1907 }
1908 else
1909 return PyString_FromStringAndSize(s+i, j-i);
1910}
1911
1912
Thomas Wouters477c8d52006-05-27 19:21:47 +00001913Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001914do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915{
1916 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 i = 0;
1920 if (striptype != RIGHTSTRIP) {
1921 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1922 i++;
1923 }
1924 }
1925
1926 j = len;
1927 if (striptype != LEFTSTRIP) {
1928 do {
1929 j--;
1930 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1931 j++;
1932 }
1933
Tim Peters8fa5dd02001-09-12 02:18:30 +00001934 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935 Py_INCREF(self);
1936 return (PyObject*)self;
1937 }
1938 else
1939 return PyString_FromStringAndSize(s+i, j-i);
1940}
1941
1942
Thomas Wouters477c8d52006-05-27 19:21:47 +00001943Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001944do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1945{
1946 PyObject *sep = NULL;
1947
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001948 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949 return NULL;
1950
1951 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001952 if (PyString_Check(sep))
1953 return do_xstrip(self, striptype, sep);
1954 else if (PyUnicode_Check(sep)) {
1955 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1956 PyObject *res;
1957 if (uniself==NULL)
1958 return NULL;
1959 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1960 striptype, sep);
1961 Py_DECREF(uniself);
1962 return res;
1963 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001964 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001965 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001966 STRIPNAME(striptype));
1967 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001968 }
1969
1970 return do_strip(self, striptype);
1971}
1972
1973
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001974PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001975"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976\n\
1977Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001979If chars is given and not None, remove characters in chars instead.\n\
1980If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981
1982static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001983string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001985 if (PyTuple_GET_SIZE(args) == 0)
1986 return do_strip(self, BOTHSTRIP); /* Common case */
1987 else
1988 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989}
1990
1991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001993"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001995Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001996If chars is given and not None, remove characters in chars instead.\n\
1997If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998
1999static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002000string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002002 if (PyTuple_GET_SIZE(args) == 0)
2003 return do_strip(self, LEFTSTRIP); /* Common case */
2004 else
2005 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006}
2007
2008
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002009PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002010"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002012Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002013If chars is given and not None, remove characters in chars instead.\n\
2014If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015
2016static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002017string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002019 if (PyTuple_GET_SIZE(args) == 0)
2020 return do_strip(self, RIGHTSTRIP); /* Common case */
2021 else
2022 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023}
2024
2025
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002026PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027"S.lower() -> string\n\
2028\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002029Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030
Thomas Wouters477c8d52006-05-27 19:21:47 +00002031/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2032#ifndef _tolower
2033#define _tolower tolower
2034#endif
2035
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002037string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002039 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002040 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002041 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002043 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002044 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002046
2047 s = PyString_AS_STRING(newobj);
2048
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002049 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002050
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002052 int c = Py_CHARMASK(s[i]);
2053 if (isupper(c))
2054 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002055 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002056
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002057 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058}
2059
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061"S.upper() -> string\n\
2062\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002063Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064
Thomas Wouters477c8d52006-05-27 19:21:47 +00002065#ifndef _toupper
2066#define _toupper toupper
2067#endif
2068
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002070string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002072 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002073 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002074 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002076 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002077 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002079
2080 s = PyString_AS_STRING(newobj);
2081
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002082 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002083
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002085 int c = Py_CHARMASK(s[i]);
2086 if (islower(c))
2087 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002089
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002090 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091}
2092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094"S.title() -> string\n\
2095\n\
2096Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002097characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098
2099static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002100string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101{
2102 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002103 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002105 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002107 newobj = PyString_FromStringAndSize(NULL, n);
2108 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002110 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 for (i = 0; i < n; i++) {
2112 int c = Py_CHARMASK(*s++);
2113 if (islower(c)) {
2114 if (!previous_is_cased)
2115 c = toupper(c);
2116 previous_is_cased = 1;
2117 } else if (isupper(c)) {
2118 if (previous_is_cased)
2119 c = tolower(c);
2120 previous_is_cased = 1;
2121 } else
2122 previous_is_cased = 0;
2123 *s_new++ = c;
2124 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002125 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002126}
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129"S.capitalize() -> string\n\
2130\n\
2131Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002132capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133
2134static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002135string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136{
2137 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002138 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002139 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002141 newobj = PyString_FromStringAndSize(NULL, n);
2142 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002144 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 if (0 < n) {
2146 int c = Py_CHARMASK(*s++);
2147 if (islower(c))
2148 *s_new = toupper(c);
2149 else
2150 *s_new = c;
2151 s_new++;
2152 }
2153 for (i = 1; i < n; i++) {
2154 int c = Py_CHARMASK(*s++);
2155 if (isupper(c))
2156 *s_new = tolower(c);
2157 else
2158 *s_new = c;
2159 s_new++;
2160 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002161 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162}
2163
2164
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002165PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166"S.count(sub[, start[, end]]) -> int\n\
2167\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168Return the number of non-overlapping occurrences of substring sub in\n\
2169string S[start:end]. Optional arguments start and end are interpreted\n\
2170as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
2172static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002173string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002175 PyObject *sub_obj;
2176 const char *str = PyString_AS_STRING(self), *sub;
2177 Py_ssize_t sub_len;
2178 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179
Thomas Wouters477c8d52006-05-27 19:21:47 +00002180 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2181 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002183
Thomas Wouters477c8d52006-05-27 19:21:47 +00002184 if (PyString_Check(sub_obj)) {
2185 sub = PyString_AS_STRING(sub_obj);
2186 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002188 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002189 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002190 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002191 if (count == -1)
2192 return NULL;
2193 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002194 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002195 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002196 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197 return NULL;
2198
Thomas Wouters477c8d52006-05-27 19:21:47 +00002199 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002200
Thomas Wouters477c8d52006-05-27 19:21:47 +00002201 return PyInt_FromSsize_t(
2202 stringlib_count(str + start, end - start, sub, sub_len)
2203 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204}
2205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002206PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207"S.swapcase() -> string\n\
2208\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211
2212static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002213string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214{
2215 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002216 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002217 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002219 newobj = PyString_FromStringAndSize(NULL, n);
2220 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002222 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 for (i = 0; i < n; i++) {
2224 int c = Py_CHARMASK(*s++);
2225 if (islower(c)) {
2226 *s_new = toupper(c);
2227 }
2228 else if (isupper(c)) {
2229 *s_new = tolower(c);
2230 }
2231 else
2232 *s_new = c;
2233 s_new++;
2234 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002235 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236}
2237
2238
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002239PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240"S.translate(table [,deletechars]) -> string\n\
2241\n\
2242Return a copy of the string S, where all characters occurring\n\
2243in the optional argument deletechars are removed, and the\n\
2244remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002245translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246
2247static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002248string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002251 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002252 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002254 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002255 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 PyObject *result;
2257 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002260 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263
2264 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002265 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 tablen = PyString_GET_SIZE(tableobj);
2267 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002268 else if (tableobj == Py_None) {
2269 table = NULL;
2270 tablen = 256;
2271 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002273 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 parameter; instead a mapping to None will cause characters
2275 to be deleted. */
2276 if (delobj != NULL) {
2277 PyErr_SetString(PyExc_TypeError,
2278 "deletions are implemented differently for unicode");
2279 return NULL;
2280 }
2281 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2282 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002283 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285
Martin v. Löwis00b61272002-12-12 20:03:19 +00002286 if (tablen != 256) {
2287 PyErr_SetString(PyExc_ValueError,
2288 "translation table must be 256 characters long");
2289 return NULL;
2290 }
2291
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292 if (delobj != NULL) {
2293 if (PyString_Check(delobj)) {
2294 del_table = PyString_AS_STRING(delobj);
2295 dellen = PyString_GET_SIZE(delobj);
2296 }
2297 else if (PyUnicode_Check(delobj)) {
2298 PyErr_SetString(PyExc_TypeError,
2299 "deletions are implemented differently for unicode");
2300 return NULL;
2301 }
2302 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2303 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 }
2305 else {
2306 del_table = NULL;
2307 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 }
2309
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002310 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311 result = PyString_FromStringAndSize((char *)NULL, inlen);
2312 if (result == NULL)
2313 return NULL;
2314 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002315 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Guido van Rossumd8faa362007-04-27 19:54:29 +00002317 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 /* If no deletions are required, use faster code */
2319 for (i = inlen; --i >= 0; ) {
2320 c = Py_CHARMASK(*input++);
2321 if (Py_CHARMASK((*output++ = table[c])) != c)
2322 changed = 1;
2323 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002324 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 return result;
2326 Py_DECREF(result);
2327 Py_INCREF(input_obj);
2328 return input_obj;
2329 }
2330
Guido van Rossumd8faa362007-04-27 19:54:29 +00002331 if (table == NULL) {
2332 for (i = 0; i < 256; i++)
2333 trans_table[i] = Py_CHARMASK(i);
2334 } else {
2335 for (i = 0; i < 256; i++)
2336 trans_table[i] = Py_CHARMASK(table[i]);
2337 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338
2339 for (i = 0; i < dellen; i++)
2340 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2341
2342 for (i = inlen; --i >= 0; ) {
2343 c = Py_CHARMASK(*input++);
2344 if (trans_table[c] != -1)
2345 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2346 continue;
2347 changed = 1;
2348 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002349 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 Py_DECREF(result);
2351 Py_INCREF(input_obj);
2352 return input_obj;
2353 }
2354 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002355 if (inlen > 0)
2356 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return result;
2358}
2359
2360
Thomas Wouters477c8d52006-05-27 19:21:47 +00002361#define FORWARD 1
2362#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365
Thomas Wouters477c8d52006-05-27 19:21:47 +00002366#define findchar(target, target_len, c) \
2367 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368
Thomas Wouters477c8d52006-05-27 19:21:47 +00002369/* String ops must return a string. */
2370/* If the object is subclass of string, create a copy */
2371Py_LOCAL(PyStringObject *)
2372return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 if (PyString_CheckExact(self)) {
2375 Py_INCREF(self);
2376 return self;
2377 }
2378 return (PyStringObject *)PyString_FromStringAndSize(
2379 PyString_AS_STRING(self),
2380 PyString_GET_SIZE(self));
2381}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382
Thomas Wouters477c8d52006-05-27 19:21:47 +00002383Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002384countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002385{
2386 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002387 const char *start=target;
2388 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002389
Thomas Wouters477c8d52006-05-27 19:21:47 +00002390 while ( (start=findchar(start, end-start, c)) != NULL ) {
2391 count++;
2392 if (count >= maxcount)
2393 break;
2394 start += 1;
2395 }
2396 return count;
2397}
2398
2399Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002400findstring(const char *target, Py_ssize_t target_len,
2401 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002402 Py_ssize_t start,
2403 Py_ssize_t end,
2404 int direction)
2405{
2406 if (start < 0) {
2407 start += target_len;
2408 if (start < 0)
2409 start = 0;
2410 }
2411 if (end > target_len) {
2412 end = target_len;
2413 } else if (end < 0) {
2414 end += target_len;
2415 if (end < 0)
2416 end = 0;
2417 }
2418
2419 /* zero-length substrings always match at the first attempt */
2420 if (pattern_len == 0)
2421 return (direction > 0) ? start : end;
2422
2423 end -= pattern_len;
2424
2425 if (direction < 0) {
2426 for (; end >= start; end--)
2427 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2428 return end;
2429 } else {
2430 for (; start <= end; start++)
2431 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2432 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 }
2434 return -1;
2435}
2436
Thomas Wouters477c8d52006-05-27 19:21:47 +00002437Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002438countstring(const char *target, Py_ssize_t target_len,
2439 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002440 Py_ssize_t start,
2441 Py_ssize_t end,
2442 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446 if (start < 0) {
2447 start += target_len;
2448 if (start < 0)
2449 start = 0;
2450 }
2451 if (end > target_len) {
2452 end = target_len;
2453 } else if (end < 0) {
2454 end += target_len;
2455 if (end < 0)
2456 end = 0;
2457 }
2458
2459 /* zero-length substrings match everywhere */
2460 if (pattern_len == 0 || maxcount == 0) {
2461 if (target_len+1 < maxcount)
2462 return target_len+1;
2463 return maxcount;
2464 }
2465
2466 end -= pattern_len;
2467 if (direction < 0) {
2468 for (; (end >= start); end--)
2469 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2470 count++;
2471 if (--maxcount <= 0) break;
2472 end -= pattern_len-1;
2473 }
2474 } else {
2475 for (; (start <= end); start++)
2476 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2477 count++;
2478 if (--maxcount <= 0)
2479 break;
2480 start += pattern_len-1;
2481 }
2482 }
2483 return count;
2484}
2485
2486
2487/* Algorithms for different cases of string replacement */
2488
2489/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2490Py_LOCAL(PyStringObject *)
2491replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002492 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002493 Py_ssize_t maxcount)
2494{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002495 char *self_s, *result_s;
2496 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002497 Py_ssize_t count, i, product;
2498 PyStringObject *result;
2499
2500 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002501
Thomas Wouters477c8d52006-05-27 19:21:47 +00002502 /* 1 at the end plus 1 after every character */
2503 count = self_len+1;
2504 if (maxcount < count)
2505 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002506
Thomas Wouters477c8d52006-05-27 19:21:47 +00002507 /* Check for overflow */
2508 /* result_len = count * to_len + self_len; */
2509 product = count * to_len;
2510 if (product / to_len != count) {
2511 PyErr_SetString(PyExc_OverflowError,
2512 "replace string is too long");
2513 return NULL;
2514 }
2515 result_len = product + self_len;
2516 if (result_len < 0) {
2517 PyErr_SetString(PyExc_OverflowError,
2518 "replace string is too long");
2519 return NULL;
2520 }
2521
2522 if (! (result = (PyStringObject *)
2523 PyString_FromStringAndSize(NULL, result_len)) )
2524 return NULL;
2525
2526 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002527 result_s = PyString_AS_STRING(result);
2528
2529 /* TODO: special case single character, which doesn't need memcpy */
2530
2531 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002532 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002533 result_s += to_len;
2534 count -= 1;
2535
2536 for (i=0; i<count; i++) {
2537 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002538 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002539 result_s += to_len;
2540 }
2541
2542 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002543 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002544
2545 return result;
2546}
2547
2548/* Special case for deleting a single character */
2549/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2550Py_LOCAL(PyStringObject *)
2551replace_delete_single_character(PyStringObject *self,
2552 char from_c, Py_ssize_t maxcount)
2553{
2554 char *self_s, *result_s;
2555 char *start, *next, *end;
2556 Py_ssize_t self_len, result_len;
2557 Py_ssize_t count;
2558 PyStringObject *result;
2559
2560 self_len = PyString_GET_SIZE(self);
2561 self_s = PyString_AS_STRING(self);
2562
2563 count = countchar(self_s, self_len, from_c, maxcount);
2564 if (count == 0) {
2565 return return_self(self);
2566 }
2567
2568 result_len = self_len - count; /* from_len == 1 */
2569 assert(result_len>=0);
2570
2571 if ( (result = (PyStringObject *)
2572 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2573 return NULL;
2574 result_s = PyString_AS_STRING(result);
2575
2576 start = self_s;
2577 end = self_s + self_len;
2578 while (count-- > 0) {
2579 next = findchar(start, end-start, from_c);
2580 if (next == NULL)
2581 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002582 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002583 result_s += (next-start);
2584 start = next+1;
2585 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002586 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587
Thomas Wouters477c8d52006-05-27 19:21:47 +00002588 return result;
2589}
2590
2591/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2592
2593Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002594replace_delete_substring(PyStringObject *self,
2595 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002596 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002597 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002598 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002599 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002600 Py_ssize_t count, offset;
2601 PyStringObject *result;
2602
2603 self_len = PyString_GET_SIZE(self);
2604 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002605
2606 count = countstring(self_s, self_len,
2607 from_s, from_len,
2608 0, self_len, 1,
2609 maxcount);
2610
2611 if (count == 0) {
2612 /* no matches */
2613 return return_self(self);
2614 }
2615
2616 result_len = self_len - (count * from_len);
2617 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002618
Thomas Wouters477c8d52006-05-27 19:21:47 +00002619 if ( (result = (PyStringObject *)
2620 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2621 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002622
Thomas Wouters477c8d52006-05-27 19:21:47 +00002623 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002624
Thomas Wouters477c8d52006-05-27 19:21:47 +00002625 start = self_s;
2626 end = self_s + self_len;
2627 while (count-- > 0) {
2628 offset = findstring(start, end-start,
2629 from_s, from_len,
2630 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002631 if (offset == -1)
2632 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002633 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002634
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002635 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002636
Thomas Wouters477c8d52006-05-27 19:21:47 +00002637 result_s += (next-start);
2638 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002639 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002640 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642}
2643
Thomas Wouters477c8d52006-05-27 19:21:47 +00002644/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2645Py_LOCAL(PyStringObject *)
2646replace_single_character_in_place(PyStringObject *self,
2647 char from_c, char to_c,
2648 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002649{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002650 char *self_s, *result_s, *start, *end, *next;
2651 Py_ssize_t self_len;
2652 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002653
Thomas Wouters477c8d52006-05-27 19:21:47 +00002654 /* The result string will be the same size */
2655 self_s = PyString_AS_STRING(self);
2656 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002657
Thomas Wouters477c8d52006-05-27 19:21:47 +00002658 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002659
Thomas Wouters477c8d52006-05-27 19:21:47 +00002660 if (next == NULL) {
2661 /* No matches; return the original string */
2662 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002663 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002664
Thomas Wouters477c8d52006-05-27 19:21:47 +00002665 /* Need to make a new string */
2666 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2667 if (result == NULL)
2668 return NULL;
2669 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002670 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002671
Thomas Wouters477c8d52006-05-27 19:21:47 +00002672 /* change everything in-place, starting with this one */
2673 start = result_s + (next-self_s);
2674 *start = to_c;
2675 start++;
2676 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677
Thomas Wouters477c8d52006-05-27 19:21:47 +00002678 while (--maxcount > 0) {
2679 next = findchar(start, end-start, from_c);
2680 if (next == NULL)
2681 break;
2682 *next = to_c;
2683 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002684 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002685
Thomas Wouters477c8d52006-05-27 19:21:47 +00002686 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687}
2688
Thomas Wouters477c8d52006-05-27 19:21:47 +00002689/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2690Py_LOCAL(PyStringObject *)
2691replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002692 const char *from_s, Py_ssize_t from_len,
2693 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002694 Py_ssize_t maxcount)
2695{
2696 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002697 char *self_s;
2698 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002699 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002702
Thomas Wouters477c8d52006-05-27 19:21:47 +00002703 self_s = PyString_AS_STRING(self);
2704 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002705
Thomas Wouters477c8d52006-05-27 19:21:47 +00002706 offset = findstring(self_s, self_len,
2707 from_s, from_len,
2708 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002709 if (offset == -1) {
2710 /* No matches; return the original string */
2711 return return_self(self);
2712 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002713
Thomas Wouters477c8d52006-05-27 19:21:47 +00002714 /* Need to make a new string */
2715 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2716 if (result == NULL)
2717 return NULL;
2718 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002719 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002720
Thomas Wouters477c8d52006-05-27 19:21:47 +00002721 /* change everything in-place, starting with this one */
2722 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002723 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002724 start += from_len;
2725 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002726
Thomas Wouters477c8d52006-05-27 19:21:47 +00002727 while ( --maxcount > 0) {
2728 offset = findstring(start, end-start,
2729 from_s, from_len,
2730 0, end-start, FORWARD);
2731 if (offset==-1)
2732 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002733 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002734 start += offset+from_len;
2735 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736
Thomas Wouters477c8d52006-05-27 19:21:47 +00002737 return result;
2738}
2739
2740/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2741Py_LOCAL(PyStringObject *)
2742replace_single_character(PyStringObject *self,
2743 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002744 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002745 Py_ssize_t maxcount)
2746{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002747 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002748 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002749 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002750 Py_ssize_t count, product;
2751 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002752
Thomas Wouters477c8d52006-05-27 19:21:47 +00002753 self_s = PyString_AS_STRING(self);
2754 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002755
Thomas Wouters477c8d52006-05-27 19:21:47 +00002756 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002757 if (count == 0) {
2758 /* no matches, return unchanged */
2759 return return_self(self);
2760 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002761
Thomas Wouters477c8d52006-05-27 19:21:47 +00002762 /* use the difference between current and new, hence the "-1" */
2763 /* result_len = self_len + count * (to_len-1) */
2764 product = count * (to_len-1);
2765 if (product / (to_len-1) != count) {
2766 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2767 return NULL;
2768 }
2769 result_len = self_len + product;
2770 if (result_len < 0) {
2771 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2772 return NULL;
2773 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002774
Thomas Wouters477c8d52006-05-27 19:21:47 +00002775 if ( (result = (PyStringObject *)
2776 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2777 return NULL;
2778 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002779
Thomas Wouters477c8d52006-05-27 19:21:47 +00002780 start = self_s;
2781 end = self_s + self_len;
2782 while (count-- > 0) {
2783 next = findchar(start, end-start, from_c);
2784 if (next == NULL)
2785 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002786
Thomas Wouters477c8d52006-05-27 19:21:47 +00002787 if (next == start) {
2788 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002789 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002790 result_s += to_len;
2791 start += 1;
2792 } else {
2793 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002794 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002795 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002796 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002797 result_s += to_len;
2798 start = next+1;
2799 }
2800 }
2801 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002802 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002803
Thomas Wouters477c8d52006-05-27 19:21:47 +00002804 return result;
2805}
2806
2807/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2808Py_LOCAL(PyStringObject *)
2809replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002810 const char *from_s, Py_ssize_t from_len,
2811 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002812 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002813 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002814 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002815 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002816 Py_ssize_t count, offset, product;
2817 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002818
Thomas Wouters477c8d52006-05-27 19:21:47 +00002819 self_s = PyString_AS_STRING(self);
2820 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002821
Thomas Wouters477c8d52006-05-27 19:21:47 +00002822 count = countstring(self_s, self_len,
2823 from_s, from_len,
2824 0, self_len, FORWARD, maxcount);
2825 if (count == 0) {
2826 /* no matches, return unchanged */
2827 return return_self(self);
2828 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002829
Thomas Wouters477c8d52006-05-27 19:21:47 +00002830 /* Check for overflow */
2831 /* result_len = self_len + count * (to_len-from_len) */
2832 product = count * (to_len-from_len);
2833 if (product / (to_len-from_len) != count) {
2834 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2835 return NULL;
2836 }
2837 result_len = self_len + product;
2838 if (result_len < 0) {
2839 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2840 return NULL;
2841 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002842
Thomas Wouters477c8d52006-05-27 19:21:47 +00002843 if ( (result = (PyStringObject *)
2844 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2845 return NULL;
2846 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002847
Thomas Wouters477c8d52006-05-27 19:21:47 +00002848 start = self_s;
2849 end = self_s + self_len;
2850 while (count-- > 0) {
2851 offset = findstring(start, end-start,
2852 from_s, from_len,
2853 0, end-start, FORWARD);
2854 if (offset == -1)
2855 break;
2856 next = start+offset;
2857 if (next == start) {
2858 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002859 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002860 result_s += to_len;
2861 start += from_len;
2862 } else {
2863 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002864 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002865 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002866 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002867 result_s += to_len;
2868 start = next+from_len;
2869 }
2870 }
2871 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002872 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002873
Thomas Wouters477c8d52006-05-27 19:21:47 +00002874 return result;
2875}
2876
2877
2878Py_LOCAL(PyStringObject *)
2879replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002880 const char *from_s, Py_ssize_t from_len,
2881 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002882 Py_ssize_t maxcount)
2883{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 if (maxcount < 0) {
2885 maxcount = PY_SSIZE_T_MAX;
2886 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2887 /* nothing to do; return the original string */
2888 return return_self(self);
2889 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002890
Thomas Wouters477c8d52006-05-27 19:21:47 +00002891 if (maxcount == 0 ||
2892 (from_len == 0 && to_len == 0)) {
2893 /* nothing to do; return the original string */
2894 return return_self(self);
2895 }
2896
2897 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002898
Thomas Wouters477c8d52006-05-27 19:21:47 +00002899 if (from_len == 0) {
2900 /* insert the 'to' string everywhere. */
2901 /* >>> "Python".replace("", ".") */
2902 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002903 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002904 }
2905
2906 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2907 /* point for an empty self string to generate a non-empty string */
2908 /* Special case so the remaining code always gets a non-empty string */
2909 if (PyString_GET_SIZE(self) == 0) {
2910 return return_self(self);
2911 }
2912
2913 if (to_len == 0) {
2914 /* delete all occurances of 'from' string */
2915 if (from_len == 1) {
2916 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002917 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002918 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002919 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002920 }
2921 }
2922
2923 /* Handle special case where both strings have the same length */
2924
2925 if (from_len == to_len) {
2926 if (from_len == 1) {
2927 return replace_single_character_in_place(
2928 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002929 from_s[0],
2930 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002931 maxcount);
2932 } else {
2933 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002934 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002935 }
2936 }
2937
2938 /* Otherwise use the more generic algorithms */
2939 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002940 return replace_single_character(self, from_s[0],
2941 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002942 } else {
2943 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002944 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002945 }
2946}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002948PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002949"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002950\n\
2951Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002952old replaced by new. If the optional argument count is\n\
2953given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002954
2955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002956string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002957{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002958 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002959 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002960 const char *from_s, *to_s;
2961 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002962
Thomas Wouters477c8d52006-05-27 19:21:47 +00002963 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002964 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965
Thomas Wouters477c8d52006-05-27 19:21:47 +00002966 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002967 from_s = PyString_AS_STRING(from);
2968 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002970 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002971 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002972 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002973 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 return NULL;
2975
Thomas Wouters477c8d52006-05-27 19:21:47 +00002976 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002977 to_s = PyString_AS_STRING(to);
2978 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002979 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002980 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002981 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002982 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002983 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002984 return NULL;
2985
Thomas Wouters477c8d52006-05-27 19:21:47 +00002986 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002987 from_s, from_len,
2988 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002989}
2990
Thomas Wouters477c8d52006-05-27 19:21:47 +00002991/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002992
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002993/* Matches the end (direction >= 0) or start (direction < 0) of self
2994 * against substr, using the start and end arguments. Returns
2995 * -1 on error, 0 if not found and 1 if found.
2996 */
2997Py_LOCAL(int)
2998_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2999 Py_ssize_t end, int direction)
3000{
3001 Py_ssize_t len = PyString_GET_SIZE(self);
3002 Py_ssize_t slen;
3003 const char* sub;
3004 const char* str;
3005
3006 if (PyString_Check(substr)) {
3007 sub = PyString_AS_STRING(substr);
3008 slen = PyString_GET_SIZE(substr);
3009 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003010 else if (PyUnicode_Check(substr))
3011 return PyUnicode_Tailmatch((PyObject *)self,
3012 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003013 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3014 return -1;
3015 str = PyString_AS_STRING(self);
3016
3017 string_adjust_indices(&start, &end, len);
3018
3019 if (direction < 0) {
3020 /* startswith */
3021 if (start+slen > len)
3022 return 0;
3023 } else {
3024 /* endswith */
3025 if (end-start < slen || start > len)
3026 return 0;
3027
3028 if (end-slen > start)
3029 start = end - slen;
3030 }
3031 if (end-start >= slen)
3032 return ! memcmp(str+start, sub, slen);
3033 return 0;
3034}
3035
3036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003037PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003038"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003040Return True if S starts with the specified prefix, False otherwise.\n\
3041With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003042With optional end, stop comparing S at that position.\n\
3043prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044
3045static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003046string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003047{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003048 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003049 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003050 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003051 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003052
Guido van Rossumc6821402000-05-08 14:08:05 +00003053 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3054 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003055 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003056 if (PyTuple_Check(subobj)) {
3057 Py_ssize_t i;
3058 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3059 result = _string_tailmatch(self,
3060 PyTuple_GET_ITEM(subobj, i),
3061 start, end, -1);
3062 if (result == -1)
3063 return NULL;
3064 else if (result) {
3065 Py_RETURN_TRUE;
3066 }
3067 }
3068 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003070 result = _string_tailmatch(self, subobj, start, end, -1);
3071 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003072 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003073 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003074 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075}
3076
3077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003078PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003079"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003080\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003081Return True if S ends with the specified suffix, False otherwise.\n\
3082With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003083With optional end, stop comparing S at that position.\n\
3084suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085
3086static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003087string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003088{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003089 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003090 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003092 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093
Guido van Rossumc6821402000-05-08 14:08:05 +00003094 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3095 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003097 if (PyTuple_Check(subobj)) {
3098 Py_ssize_t i;
3099 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3100 result = _string_tailmatch(self,
3101 PyTuple_GET_ITEM(subobj, i),
3102 start, end, +1);
3103 if (result == -1)
3104 return NULL;
3105 else if (result) {
3106 Py_RETURN_TRUE;
3107 }
3108 }
3109 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003111 result = _string_tailmatch(self, subobj, start, end, +1);
3112 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003113 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003114 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003115 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003116}
3117
3118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003119PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003120"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003121\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003122Encodes S using the codec registered for encoding. encoding defaults\n\
3123to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003124handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003125a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3126'xmlcharrefreplace' as well as any other name registered with\n\
3127codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003128
3129static PyObject *
3130string_encode(PyStringObject *self, PyObject *args)
3131{
3132 char *encoding = NULL;
3133 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003134 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003135
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003136 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3137 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003138 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003139 if (v == NULL)
3140 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003141 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003142 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003143 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003144 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003145 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003146 Py_DECREF(v);
3147 return NULL;
3148 }
3149 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003150
3151 onError:
3152 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003153}
3154
3155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003156PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003157"S.decode([encoding[,errors]]) -> object\n\
3158\n\
3159Decodes S using the codec registered for encoding. encoding defaults\n\
3160to the default encoding. errors may be given to set a different error\n\
3161handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003162a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3163as well as any other name registerd with codecs.register_error that is\n\
3164able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003165
3166static PyObject *
3167string_decode(PyStringObject *self, PyObject *args)
3168{
3169 char *encoding = NULL;
3170 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003171 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003172
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003173 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3174 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003175 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003176 if (v == NULL)
3177 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003178 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3179 PyErr_Format(PyExc_TypeError,
3180 "decoder did not return a string/unicode object "
3181 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003182 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003183 Py_DECREF(v);
3184 return NULL;
3185 }
3186 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003187
3188 onError:
3189 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003190}
3191
3192
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003193PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194"S.expandtabs([tabsize]) -> string\n\
3195\n\
3196Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003197If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198
3199static PyObject*
3200string_expandtabs(PyStringObject *self, PyObject *args)
3201{
3202 const char *e, *p;
3203 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003204 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003205 PyObject *u;
3206 int tabsize = 8;
3207
3208 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3209 return NULL;
3210
Thomas Wouters7e474022000-07-16 12:04:32 +00003211 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003212 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3214 for (p = PyString_AS_STRING(self); p < e; p++)
3215 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003216 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003217 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003218 if (old_j > j) {
3219 PyErr_SetString(PyExc_OverflowError,
3220 "new string is too long");
3221 return NULL;
3222 }
3223 old_j = j;
3224 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225 }
3226 else {
3227 j++;
3228 if (*p == '\n' || *p == '\r') {
3229 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003230 old_j = j = 0;
3231 if (i < 0) {
3232 PyErr_SetString(PyExc_OverflowError,
3233 "new string is too long");
3234 return NULL;
3235 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236 }
3237 }
3238
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003239 if ((i + j) < 0) {
3240 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3241 return NULL;
3242 }
3243
Guido van Rossum4c08d552000-03-10 22:55:18 +00003244 /* Second pass: create output string and fill it */
3245 u = PyString_FromStringAndSize(NULL, i + j);
3246 if (!u)
3247 return NULL;
3248
3249 j = 0;
3250 q = PyString_AS_STRING(u);
3251
3252 for (p = PyString_AS_STRING(self); p < e; p++)
3253 if (*p == '\t') {
3254 if (tabsize > 0) {
3255 i = tabsize - (j % tabsize);
3256 j += i;
3257 while (i--)
3258 *q++ = ' ';
3259 }
3260 }
3261 else {
3262 j++;
3263 *q++ = *p;
3264 if (*p == '\n' || *p == '\r')
3265 j = 0;
3266 }
3267
3268 return u;
3269}
3270
Thomas Wouters477c8d52006-05-27 19:21:47 +00003271Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003272pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003273{
3274 PyObject *u;
3275
3276 if (left < 0)
3277 left = 0;
3278 if (right < 0)
3279 right = 0;
3280
Tim Peters8fa5dd02001-09-12 02:18:30 +00003281 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003282 Py_INCREF(self);
3283 return (PyObject *)self;
3284 }
3285
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003286 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003287 left + PyString_GET_SIZE(self) + right);
3288 if (u) {
3289 if (left)
3290 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003291 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003292 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003293 PyString_GET_SIZE(self));
3294 if (right)
3295 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3296 fill, right);
3297 }
3298
3299 return u;
3300}
3301
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003302PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003303"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003304"\n"
3305"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003306"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003307
3308static PyObject *
3309string_ljust(PyStringObject *self, PyObject *args)
3310{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003311 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003312 char fillchar = ' ';
3313
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003314 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003315 return NULL;
3316
Tim Peters8fa5dd02001-09-12 02:18:30 +00003317 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003318 Py_INCREF(self);
3319 return (PyObject*) self;
3320 }
3321
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003322 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003323}
3324
3325
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003326PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003327"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003328"\n"
3329"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003330"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003331
3332static PyObject *
3333string_rjust(PyStringObject *self, PyObject *args)
3334{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003335 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003336 char fillchar = ' ';
3337
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003338 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003339 return NULL;
3340
Tim Peters8fa5dd02001-09-12 02:18:30 +00003341 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 Py_INCREF(self);
3343 return (PyObject*) self;
3344 }
3345
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003346 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003347}
3348
3349
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003350PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003351"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003352"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003353"Return S centered in a string of length width. Padding is\n"
3354"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003355
3356static PyObject *
3357string_center(PyStringObject *self, PyObject *args)
3358{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003359 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003360 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003361 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003362
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003363 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364 return NULL;
3365
Tim Peters8fa5dd02001-09-12 02:18:30 +00003366 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 Py_INCREF(self);
3368 return (PyObject*) self;
3369 }
3370
3371 marg = width - PyString_GET_SIZE(self);
3372 left = marg / 2 + (marg & width & 1);
3373
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003374 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003375}
3376
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003377PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003378"S.zfill(width) -> string\n"
3379"\n"
3380"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003381"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003382
3383static PyObject *
3384string_zfill(PyStringObject *self, PyObject *args)
3385{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003386 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003387 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003388 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003389 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003390
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003391 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003392 return NULL;
3393
3394 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003395 if (PyString_CheckExact(self)) {
3396 Py_INCREF(self);
3397 return (PyObject*) self;
3398 }
3399 else
3400 return PyString_FromStringAndSize(
3401 PyString_AS_STRING(self),
3402 PyString_GET_SIZE(self)
3403 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003404 }
3405
3406 fill = width - PyString_GET_SIZE(self);
3407
3408 s = pad(self, fill, 0, '0');
3409
3410 if (s == NULL)
3411 return NULL;
3412
3413 p = PyString_AS_STRING(s);
3414 if (p[fill] == '+' || p[fill] == '-') {
3415 /* move sign to beginning of string */
3416 p[0] = p[fill];
3417 p[fill] = '0';
3418 }
3419
3420 return (PyObject*) s;
3421}
3422
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003423PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003424"S.isspace() -> bool\n\
3425\n\
3426Return True if all characters in S are whitespace\n\
3427and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003428
3429static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003430string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431{
Fred Drakeba096332000-07-09 07:04:36 +00003432 register const unsigned char *p
3433 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003434 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436 /* Shortcut for single character strings */
3437 if (PyString_GET_SIZE(self) == 1 &&
3438 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003439 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003441 /* Special case for empty strings */
3442 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003443 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003444
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445 e = p + PyString_GET_SIZE(self);
3446 for (; p < e; p++) {
3447 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003448 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003450 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451}
3452
3453
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003454PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003455"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003456\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003457Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003458and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003459
3460static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003461string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003462{
Fred Drakeba096332000-07-09 07:04:36 +00003463 register const unsigned char *p
3464 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003465 register const unsigned char *e;
3466
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003467 /* Shortcut for single character strings */
3468 if (PyString_GET_SIZE(self) == 1 &&
3469 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003470 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003471
3472 /* Special case for empty strings */
3473 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003474 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003475
3476 e = p + PyString_GET_SIZE(self);
3477 for (; p < e; p++) {
3478 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003479 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003480 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003481 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003482}
3483
3484
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003485PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003486"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003487\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003488Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003489and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003490
3491static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003492string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003493{
Fred Drakeba096332000-07-09 07:04:36 +00003494 register const unsigned char *p
3495 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003496 register const unsigned char *e;
3497
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003498 /* Shortcut for single character strings */
3499 if (PyString_GET_SIZE(self) == 1 &&
3500 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003501 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003502
3503 /* Special case for empty strings */
3504 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003505 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003506
3507 e = p + PyString_GET_SIZE(self);
3508 for (; p < e; p++) {
3509 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003510 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003511 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003512 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003513}
3514
3515
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003516PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003517"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003519Return True if all characters in S are digits\n\
3520and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003521
3522static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003523string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003524{
Fred Drakeba096332000-07-09 07:04:36 +00003525 register const unsigned char *p
3526 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003527 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003528
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529 /* Shortcut for single character strings */
3530 if (PyString_GET_SIZE(self) == 1 &&
3531 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003532 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003534 /* Special case for empty strings */
3535 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003536 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003537
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538 e = p + PyString_GET_SIZE(self);
3539 for (; p < e; p++) {
3540 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003541 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003543 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544}
3545
3546
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003547PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003551at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552
3553static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003554string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555{
Fred Drakeba096332000-07-09 07:04:36 +00003556 register const unsigned char *p
3557 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003558 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003559 int cased;
3560
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561 /* Shortcut for single character strings */
3562 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003563 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003564
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003565 /* Special case for empty strings */
3566 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003568
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569 e = p + PyString_GET_SIZE(self);
3570 cased = 0;
3571 for (; p < e; p++) {
3572 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574 else if (!cased && islower(*p))
3575 cased = 1;
3576 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578}
3579
3580
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003581PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003584Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003585at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586
3587static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003588string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589{
Fred Drakeba096332000-07-09 07:04:36 +00003590 register const unsigned char *p
3591 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003592 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003593 int cased;
3594
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595 /* Shortcut for single character strings */
3596 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003597 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003599 /* Special case for empty strings */
3600 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003602
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603 e = p + PyString_GET_SIZE(self);
3604 cased = 0;
3605 for (; p < e; p++) {
3606 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003608 else if (!cased && isupper(*p))
3609 cased = 1;
3610 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612}
3613
3614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003618Return True if S is a titlecased string and there is at least one\n\
3619character in S, i.e. uppercase characters may only follow uncased\n\
3620characters and lowercase characters only cased ones. Return False\n\
3621otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622
3623static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003624string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625{
Fred Drakeba096332000-07-09 07:04:36 +00003626 register const unsigned char *p
3627 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003628 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 int cased, previous_is_cased;
3630
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631 /* Shortcut for single character strings */
3632 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003635 /* Special case for empty strings */
3636 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003638
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639 e = p + PyString_GET_SIZE(self);
3640 cased = 0;
3641 previous_is_cased = 0;
3642 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003643 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644
3645 if (isupper(ch)) {
3646 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648 previous_is_cased = 1;
3649 cased = 1;
3650 }
3651 else if (islower(ch)) {
3652 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 previous_is_cased = 1;
3655 cased = 1;
3656 }
3657 else
3658 previous_is_cased = 0;
3659 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661}
3662
3663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003664PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003665"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666\n\
3667Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003668Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003669is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671static PyObject*
3672string_splitlines(PyStringObject *self, PyObject *args)
3673{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003674 register Py_ssize_t i;
3675 register Py_ssize_t j;
3676 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003677 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 PyObject *list;
3679 PyObject *str;
3680 char *data;
3681
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003682 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683 return NULL;
3684
3685 data = PyString_AS_STRING(self);
3686 len = PyString_GET_SIZE(self);
3687
Thomas Wouters477c8d52006-05-27 19:21:47 +00003688 /* This does not use the preallocated list because splitlines is
3689 usually run with hundreds of newlines. The overhead of
3690 switching between PyList_SET_ITEM and append causes about a
3691 2-3% slowdown for that common case. A smarter implementation
3692 could move the if check out, so the SET_ITEMs are done first
3693 and the appends only done when the prealloc buffer is full.
3694 That's too much work for little gain.*/
3695
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 list = PyList_New(0);
3697 if (!list)
3698 goto onError;
3699
3700 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003701 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003702
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703 /* Find a line and append it */
3704 while (i < len && data[i] != '\n' && data[i] != '\r')
3705 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706
3707 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003708 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 if (i < len) {
3710 if (data[i] == '\r' && i + 1 < len &&
3711 data[i+1] == '\n')
3712 i += 2;
3713 else
3714 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003715 if (keepends)
3716 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003718 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 j = i;
3720 }
3721 if (j < len) {
3722 SPLIT_APPEND(data, j, len);
3723 }
3724
3725 return list;
3726
3727 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003728 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 return NULL;
3730}
3731
3732#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003733#undef SPLIT_ADD
3734#undef MAX_PREALLOC
3735#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003737static PyObject *
3738string_getnewargs(PyStringObject *v)
3739{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003740 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003741}
3742
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003743
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003744static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003745string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003746 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3747 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003748 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003749 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3750 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003751 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3752 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3753 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3754 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3755 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3756 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3757 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003758 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3759 capitalize__doc__},
3760 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3761 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3762 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003763 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003764 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3765 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3766 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3767 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3768 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3769 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3770 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003771 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3772 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003773 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3774 startswith__doc__},
3775 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3776 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3777 swapcase__doc__},
3778 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3779 translate__doc__},
3780 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3781 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3782 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3783 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3784 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3785 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3786 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3787 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3788 expandtabs__doc__},
3789 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3790 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003791 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003792 {NULL, NULL} /* sentinel */
3793};
3794
Jeremy Hylton938ace62002-07-17 16:30:39 +00003795static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003796str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3797
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003798static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003799string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003800{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003801 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003802 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003803
Guido van Rossumae960af2001-08-30 03:11:59 +00003804 if (type != &PyString_Type)
3805 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003806 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003807 return NULL;
3808 if (x == NULL)
3809 return PyString_FromString("");
3810 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003811}
3812
Guido van Rossumae960af2001-08-30 03:11:59 +00003813static PyObject *
3814str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3815{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003816 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003817 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003818
3819 assert(PyType_IsSubtype(type, &PyString_Type));
3820 tmp = string_new(&PyString_Type, args, kwds);
3821 if (tmp == NULL)
3822 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003823 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003824 n = PyString_GET_SIZE(tmp);
3825 pnew = type->tp_alloc(type, n);
3826 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003827 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003828 ((PyStringObject *)pnew)->ob_shash =
3829 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003830 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003831 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003832 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003833 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003834}
3835
Guido van Rossumcacfc072002-05-24 19:01:59 +00003836static PyObject *
3837basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3838{
3839 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003840 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003841 return NULL;
3842}
3843
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003844static PyObject *
3845string_mod(PyObject *v, PyObject *w)
3846{
3847 if (!PyString_Check(v)) {
3848 Py_INCREF(Py_NotImplemented);
3849 return Py_NotImplemented;
3850 }
3851 return PyString_Format(v, w);
3852}
3853
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003854PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003855"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003856
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003857static PyNumberMethods string_as_number = {
3858 0, /*nb_add*/
3859 0, /*nb_subtract*/
3860 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003861 string_mod, /*nb_remainder*/
3862};
3863
3864
Guido van Rossumcacfc072002-05-24 19:01:59 +00003865PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003866 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003867 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003868 0,
3869 0,
3870 0, /* tp_dealloc */
3871 0, /* tp_print */
3872 0, /* tp_getattr */
3873 0, /* tp_setattr */
3874 0, /* tp_compare */
3875 0, /* tp_repr */
3876 0, /* tp_as_number */
3877 0, /* tp_as_sequence */
3878 0, /* tp_as_mapping */
3879 0, /* tp_hash */
3880 0, /* tp_call */
3881 0, /* tp_str */
3882 0, /* tp_getattro */
3883 0, /* tp_setattro */
3884 0, /* tp_as_buffer */
3885 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3886 basestring_doc, /* tp_doc */
3887 0, /* tp_traverse */
3888 0, /* tp_clear */
3889 0, /* tp_richcompare */
3890 0, /* tp_weaklistoffset */
3891 0, /* tp_iter */
3892 0, /* tp_iternext */
3893 0, /* tp_methods */
3894 0, /* tp_members */
3895 0, /* tp_getset */
3896 &PyBaseObject_Type, /* tp_base */
3897 0, /* tp_dict */
3898 0, /* tp_descr_get */
3899 0, /* tp_descr_set */
3900 0, /* tp_dictoffset */
3901 0, /* tp_init */
3902 0, /* tp_alloc */
3903 basestring_new, /* tp_new */
3904 0, /* tp_free */
3905};
3906
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003907PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003908"str(object) -> string\n\
3909\n\
3910Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003911If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003912
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003913static PyObject *str_iter(PyObject *seq);
3914
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003915PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003916 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003917 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003918 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003919 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003920 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003921 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003922 0, /* tp_getattr */
3923 0, /* tp_setattr */
3924 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003925 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003926 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003927 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003928 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003929 (hashfunc)string_hash, /* tp_hash */
3930 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003931 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003932 PyObject_GenericGetAttr, /* tp_getattro */
3933 0, /* tp_setattro */
3934 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003935 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3936 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003937 string_doc, /* tp_doc */
3938 0, /* tp_traverse */
3939 0, /* tp_clear */
3940 (richcmpfunc)string_richcompare, /* tp_richcompare */
3941 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003942 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003943 0, /* tp_iternext */
3944 string_methods, /* tp_methods */
3945 0, /* tp_members */
3946 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003947 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003948 0, /* tp_dict */
3949 0, /* tp_descr_get */
3950 0, /* tp_descr_set */
3951 0, /* tp_dictoffset */
3952 0, /* tp_init */
3953 0, /* tp_alloc */
3954 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003955 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003956};
3957
3958void
Fred Drakeba096332000-07-09 07:04:36 +00003959PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003960{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003961 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003962 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003963 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 if (w == NULL || !PyString_Check(*pv)) {
3965 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003966 *pv = NULL;
3967 return;
3968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003969 v = string_concat((PyStringObject *) *pv, w);
3970 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003971 *pv = v;
3972}
3973
Guido van Rossum013142a1994-08-30 08:19:36 +00003974void
Fred Drakeba096332000-07-09 07:04:36 +00003975PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003976{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003977 PyString_Concat(pv, w);
3978 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003979}
3980
3981
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003982/* The following function breaks the notion that strings are immutable:
3983 it changes the size of a string. We get away with this only if there
3984 is only one module referencing the object. You can also think of it
3985 as creating a new string object and destroying the old one, only
3986 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003987 already be known to some other part of the code...
3988 Note that if there's not enough memory to resize the string, the original
3989 string object at *pv is deallocated, *pv is set to NULL, an "out of
3990 memory" exception is set, and -1 is returned. Else (on success) 0 is
3991 returned, and the value in *pv may or may not be the same as on input.
3992 As always, an extra byte is allocated for a trailing \0 byte (newsize
3993 does *not* include that), and a trailing \0 byte is stored.
3994*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003995
3996int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003997_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003998{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 register PyObject *v;
4000 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004001 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004002 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004003 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004004 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004005 Py_DECREF(v);
4006 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004007 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004008 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004009 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004010 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004011 _Py_ForgetReference(v);
4012 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004013 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004014 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004015 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004017 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004019 _Py_NewReference(*pv);
4020 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004021 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004022 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004023 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004024 return 0;
4025}
Guido van Rossume5372401993-03-16 12:15:04 +00004026
4027/* Helpers for formatstring */
4028
Thomas Wouters477c8d52006-05-27 19:21:47 +00004029Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004030getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004031{
Thomas Wouters977485d2006-02-16 15:59:12 +00004032 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004033 if (argidx < arglen) {
4034 (*p_argidx)++;
4035 if (arglen < 0)
4036 return args;
4037 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004038 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004039 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004040 PyErr_SetString(PyExc_TypeError,
4041 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004042 return NULL;
4043}
4044
Tim Peters38fd5b62000-09-21 05:43:11 +00004045/* Format codes
4046 * F_LJUST '-'
4047 * F_SIGN '+'
4048 * F_BLANK ' '
4049 * F_ALT '#'
4050 * F_ZERO '0'
4051 */
Guido van Rossume5372401993-03-16 12:15:04 +00004052#define F_LJUST (1<<0)
4053#define F_SIGN (1<<1)
4054#define F_BLANK (1<<2)
4055#define F_ALT (1<<3)
4056#define F_ZERO (1<<4)
4057
Thomas Wouters477c8d52006-05-27 19:21:47 +00004058Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004059formatfloat(char *buf, size_t buflen, int flags,
4060 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004061{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004062 /* fmt = '%#.' + `prec` + `type`
4063 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004064 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004065 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004066 x = PyFloat_AsDouble(v);
4067 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004068 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004069 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004070 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004071 }
Guido van Rossume5372401993-03-16 12:15:04 +00004072 if (prec < 0)
4073 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004074 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4075 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004076 /* Worst case length calc to ensure no buffer overrun:
4077
4078 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004079 fmt = %#.<prec>g
4080 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004081 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004082 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004083
4084 'f' formats:
4085 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4086 len = 1 + 50 + 1 + prec = 52 + prec
4087
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004088 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004089 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004090
4091 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004092 if (((type == 'g' || type == 'G') &&
4093 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004094 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004095 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004096 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004097 return -1;
4098 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004099 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4100 (flags&F_ALT) ? "#" : "",
4101 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004102 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004103 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004104}
4105
Tim Peters38fd5b62000-09-21 05:43:11 +00004106/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4107 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4108 * Python's regular ints.
4109 * Return value: a new PyString*, or NULL if error.
4110 * . *pbuf is set to point into it,
4111 * *plen set to the # of chars following that.
4112 * Caller must decref it when done using pbuf.
4113 * The string starting at *pbuf is of the form
4114 * "-"? ("0x" | "0X")? digit+
4115 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004116 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004117 * There will be at least prec digits, zero-filled on the left if
4118 * necessary to get that many.
4119 * val object to be converted
4120 * flags bitmask of format flags; only F_ALT is looked at
4121 * prec minimum number of digits; 0-fill on left if needed
4122 * type a character in [duoxX]; u acts the same as d
4123 *
4124 * CAUTION: o, x and X conversions on regular ints can never
4125 * produce a '-' sign, but can for Python's unbounded ints.
4126 */
4127PyObject*
4128_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4129 char **pbuf, int *plen)
4130{
4131 PyObject *result = NULL;
4132 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004133 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004134 int sign; /* 1 if '-', else 0 */
4135 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004136 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004137 int numdigits; /* len == numnondigits + numdigits */
4138 int numnondigits = 0;
4139
Guido van Rossumddefaf32007-01-14 03:31:43 +00004140 /* Avoid exceeding SSIZE_T_MAX */
4141 if (prec > PY_SSIZE_T_MAX-3) {
4142 PyErr_SetString(PyExc_OverflowError,
4143 "precision too large");
4144 return NULL;
4145 }
4146
4147
Tim Peters38fd5b62000-09-21 05:43:11 +00004148 switch (type) {
4149 case 'd':
4150 case 'u':
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004151 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004152 break;
4153 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004154 numnondigits = 2;
4155 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 break;
4157 case 'x':
4158 case 'X':
4159 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004160 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004161 break;
4162 default:
4163 assert(!"'type' not in [duoxX]");
4164 }
4165 if (!result)
4166 return NULL;
4167
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004168 buf = PyString_AsString(result);
4169 if (!buf) {
4170 Py_DECREF(result);
4171 return NULL;
4172 }
4173
Tim Peters38fd5b62000-09-21 05:43:11 +00004174 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004175 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004176 PyErr_BadInternalCall();
4177 return NULL;
4178 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004179 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004180 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004181 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4182 return NULL;
4183 }
4184 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004185 if (buf[len-1] == 'L') {
4186 --len;
4187 buf[len] = '\0';
4188 }
4189 sign = buf[0] == '-';
4190 numnondigits += sign;
4191 numdigits = len - numnondigits;
4192 assert(numdigits > 0);
4193
Tim Petersfff53252001-04-12 18:38:48 +00004194 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004195 if (((flags & F_ALT) == 0 &&
4196 (type == 'o' || type == 'x' || type == 'X'))) {
4197 assert(buf[sign] == '0');
4198 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4199 buf[sign+1] == 'o');
4200 numnondigits -= 2;
4201 buf += 2;
4202 len -= 2;
4203 if (sign)
4204 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004205 assert(len == numnondigits + numdigits);
4206 assert(numdigits > 0);
4207 }
4208
4209 /* Fill with leading zeroes to meet minimum width. */
4210 if (prec > numdigits) {
4211 PyObject *r1 = PyString_FromStringAndSize(NULL,
4212 numnondigits + prec);
4213 char *b1;
4214 if (!r1) {
4215 Py_DECREF(result);
4216 return NULL;
4217 }
4218 b1 = PyString_AS_STRING(r1);
4219 for (i = 0; i < numnondigits; ++i)
4220 *b1++ = *buf++;
4221 for (i = 0; i < prec - numdigits; i++)
4222 *b1++ = '0';
4223 for (i = 0; i < numdigits; i++)
4224 *b1++ = *buf++;
4225 *b1 = '\0';
4226 Py_DECREF(result);
4227 result = r1;
4228 buf = PyString_AS_STRING(result);
4229 len = numnondigits + prec;
4230 }
4231
4232 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004233 if (type == 'X') {
4234 /* Need to convert all lower case letters to upper case.
4235 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004236 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004237 if (buf[i] >= 'a' && buf[i] <= 'x')
4238 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004239 }
4240 *pbuf = buf;
4241 *plen = len;
4242 return result;
4243}
4244
Thomas Wouters477c8d52006-05-27 19:21:47 +00004245Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004246formatint(char *buf, size_t buflen, int flags,
4247 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004248{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004249 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004250 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4251 + 1 + 1 = 24 */
4252 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004253 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004254 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004255
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004256 x = PyInt_AsLong(v);
4257 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004258 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004259 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004260 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004261 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004262 if (x < 0 && type == 'u') {
4263 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004264 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004265 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4266 sign = "-";
4267 else
4268 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004269 if (prec < 0)
4270 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004271
4272 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004273 (type == 'x' || type == 'X' || type == 'o')) {
4274 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004275 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004276 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004277 * - when 0 is being converted, the C standard leaves off
4278 * the '0x' or '0X', which is inconsistent with other
4279 * %#x/%#X conversions and inconsistent with Python's
4280 * hex() function
4281 * - there are platforms that violate the standard and
4282 * convert 0 with the '0x' or '0X'
4283 * (Metrowerks, Compaq Tru64)
4284 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004285 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004286 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004287 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004288 * We can achieve the desired consistency by inserting our
4289 * own '0x' or '0X' prefix, and substituting %x/%X in place
4290 * of %#x/%#X.
4291 *
4292 * Note that this is the same approach as used in
4293 * formatint() in unicodeobject.c
4294 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004295 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4296 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004297 }
4298 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004299 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4300 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004301 prec, type);
4302 }
4303
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004304 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004305 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004306 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004307 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004308 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004309 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004310 return -1;
4311 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004312 if (sign[0])
4313 PyOS_snprintf(buf, buflen, fmt, -x);
4314 else
4315 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004316 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004317}
4318
Thomas Wouters477c8d52006-05-27 19:21:47 +00004319Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004320formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004321{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004322 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004323 if (PyString_Check(v)) {
4324 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004325 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004326 }
4327 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004328 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004329 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004330 }
4331 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004332 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004333}
4334
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004335/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4336
4337 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4338 chars are formatted. XXX This is a magic number. Each formatting
4339 routine does bounds checking to ensure no overflow, but a better
4340 solution may be to malloc a buffer of appropriate size for each
4341 format. For now, the current solution is sufficient.
4342*/
4343#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004344
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004345PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004346PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004347{
4348 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004349 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004350 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004351 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004352 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004353 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004354 PyObject *dict = NULL;
4355 if (format == NULL || !PyString_Check(format) || args == NULL) {
4356 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004357 return NULL;
4358 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004359 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004360 fmt = PyString_AS_STRING(format);
4361 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004362 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004363 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004364 if (result == NULL)
4365 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004366 res = PyString_AsString(result);
4367 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004368 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004369 argidx = 0;
4370 }
4371 else {
4372 arglen = -1;
4373 argidx = -2;
4374 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004375 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004376 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004377 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004378 while (--fmtcnt >= 0) {
4379 if (*fmt != '%') {
4380 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004381 rescnt = fmtcnt + 100;
4382 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004383 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004384 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004385 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004386 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004387 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004388 }
4389 *res++ = *fmt++;
4390 }
4391 else {
4392 /* Got a format specifier */
4393 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004394 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004395 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004396 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004397 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004398 PyObject *v = NULL;
4399 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004400 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004401 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004402 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004403 char formatbuf[FORMATBUFLEN];
4404 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004405 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004406 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004407
Guido van Rossumda9c2711996-12-05 21:58:58 +00004408 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004409 if (*fmt == '(') {
4410 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004411 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004412 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004413 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004414
4415 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004416 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004417 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004418 goto error;
4419 }
4420 ++fmt;
4421 --fmtcnt;
4422 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004423 /* Skip over balanced parentheses */
4424 while (pcount > 0 && --fmtcnt >= 0) {
4425 if (*fmt == ')')
4426 --pcount;
4427 else if (*fmt == '(')
4428 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004429 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004430 }
4431 keylen = fmt - keystart - 1;
4432 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004433 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004434 "incomplete format key");
4435 goto error;
4436 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004437 key = PyString_FromStringAndSize(keystart,
4438 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004439 if (key == NULL)
4440 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004441 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004442 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004443 args_owned = 0;
4444 }
4445 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004446 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004447 if (args == NULL) {
4448 goto error;
4449 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004450 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004451 arglen = -1;
4452 argidx = -2;
4453 }
Guido van Rossume5372401993-03-16 12:15:04 +00004454 while (--fmtcnt >= 0) {
4455 switch (c = *fmt++) {
4456 case '-': flags |= F_LJUST; continue;
4457 case '+': flags |= F_SIGN; continue;
4458 case ' ': flags |= F_BLANK; continue;
4459 case '#': flags |= F_ALT; continue;
4460 case '0': flags |= F_ZERO; continue;
4461 }
4462 break;
4463 }
4464 if (c == '*') {
4465 v = getnextarg(args, arglen, &argidx);
4466 if (v == NULL)
4467 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004468 if (!PyInt_Check(v)) {
4469 PyErr_SetString(PyExc_TypeError,
4470 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004471 goto error;
4472 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004473 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004474 if (width == -1 && PyErr_Occurred())
4475 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004476 if (width < 0) {
4477 flags |= F_LJUST;
4478 width = -width;
4479 }
Guido van Rossume5372401993-03-16 12:15:04 +00004480 if (--fmtcnt >= 0)
4481 c = *fmt++;
4482 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004483 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004484 width = c - '0';
4485 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004486 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004487 if (!isdigit(c))
4488 break;
4489 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 PyErr_SetString(
4491 PyExc_ValueError,
4492 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004493 goto error;
4494 }
4495 width = width*10 + (c - '0');
4496 }
4497 }
4498 if (c == '.') {
4499 prec = 0;
4500 if (--fmtcnt >= 0)
4501 c = *fmt++;
4502 if (c == '*') {
4503 v = getnextarg(args, arglen, &argidx);
4504 if (v == NULL)
4505 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004506 if (!PyInt_Check(v)) {
4507 PyErr_SetString(
4508 PyExc_TypeError,
4509 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004510 goto error;
4511 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004512 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004513 if (prec == -1 && PyErr_Occurred())
4514 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004515 if (prec < 0)
4516 prec = 0;
4517 if (--fmtcnt >= 0)
4518 c = *fmt++;
4519 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004520 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004521 prec = c - '0';
4522 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004523 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004524 if (!isdigit(c))
4525 break;
4526 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004527 PyErr_SetString(
4528 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004529 "prec too big");
4530 goto error;
4531 }
4532 prec = prec*10 + (c - '0');
4533 }
4534 }
4535 } /* prec */
4536 if (fmtcnt >= 0) {
4537 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004538 if (--fmtcnt >= 0)
4539 c = *fmt++;
4540 }
4541 }
4542 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004543 PyErr_SetString(PyExc_ValueError,
4544 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004545 goto error;
4546 }
4547 if (c != '%') {
4548 v = getnextarg(args, arglen, &argidx);
4549 if (v == NULL)
4550 goto error;
4551 }
4552 sign = 0;
4553 fill = ' ';
4554 switch (c) {
4555 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004556 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004557 len = 1;
4558 break;
4559 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004560 if (PyUnicode_Check(v)) {
4561 fmt = fmt_start;
4562 argidx = argidx_start;
4563 goto unicode;
4564 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004565 temp = _PyObject_Str(v);
4566 if (temp != NULL && PyUnicode_Check(temp)) {
4567 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004568 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004569 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004570 goto unicode;
4571 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004572 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004573 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004574 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004575 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004576 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004577 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004578 if (!PyString_Check(temp)) {
4579 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004580 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004581 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004582 goto error;
4583 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004584 pbuf = PyString_AS_STRING(temp);
4585 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004586 if (prec >= 0 && len > prec)
4587 len = prec;
4588 break;
4589 case 'i':
4590 case 'd':
4591 case 'u':
4592 case 'o':
4593 case 'x':
4594 case 'X':
4595 if (c == 'i')
4596 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004597 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004598 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004599 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004600 prec, c, &pbuf, &ilen);
4601 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004602 if (!temp)
4603 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004604 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004605 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004606 else {
4607 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004608 len = formatint(pbuf,
4609 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004610 flags, prec, c, v);
4611 if (len < 0)
4612 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004613 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004614 }
4615 if (flags & F_ZERO)
4616 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004617 break;
4618 case 'e':
4619 case 'E':
4620 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004621 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004622 case 'g':
4623 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004624 if (c == 'F')
4625 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004626 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004627 len = formatfloat(pbuf, sizeof(formatbuf),
4628 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004629 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004630 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004631 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004632 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004633 fill = '0';
4634 break;
4635 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004636 if (PyUnicode_Check(v)) {
4637 fmt = fmt_start;
4638 argidx = argidx_start;
4639 goto unicode;
4640 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004641 pbuf = formatbuf;
4642 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004643 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004644 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004645 break;
4646 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004647 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004648 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004649 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004650 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004651 (Py_ssize_t)(fmt - 1 -
4652 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004653 goto error;
4654 }
4655 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004656 if (*pbuf == '-' || *pbuf == '+') {
4657 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004658 len--;
4659 }
4660 else if (flags & F_SIGN)
4661 sign = '+';
4662 else if (flags & F_BLANK)
4663 sign = ' ';
4664 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004665 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004666 }
4667 if (width < len)
4668 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004669 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004670 reslen -= rescnt;
4671 rescnt = width + fmtcnt + 100;
4672 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004673 if (reslen < 0) {
4674 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004675 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004676 return PyErr_NoMemory();
4677 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004678 if (_PyString_Resize(&result, reslen) < 0) {
4679 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004680 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004681 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004682 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004683 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004684 }
4685 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004686 if (fill != ' ')
4687 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004688 rescnt--;
4689 if (width > len)
4690 width--;
4691 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004692 if ((flags & F_ALT) &&
4693 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004694 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004695 assert(pbuf[1] == c);
4696 if (fill != ' ') {
4697 *res++ = *pbuf++;
4698 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004699 }
Tim Petersfff53252001-04-12 18:38:48 +00004700 rescnt -= 2;
4701 width -= 2;
4702 if (width < 0)
4703 width = 0;
4704 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004705 }
4706 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004707 do {
4708 --rescnt;
4709 *res++ = fill;
4710 } while (--width > len);
4711 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004712 if (fill == ' ') {
4713 if (sign)
4714 *res++ = sign;
4715 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004716 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004717 assert(pbuf[0] == '0');
4718 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004719 *res++ = *pbuf++;
4720 *res++ = *pbuf++;
4721 }
4722 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004723 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004724 res += len;
4725 rescnt -= len;
4726 while (--width >= len) {
4727 --rescnt;
4728 *res++ = ' ';
4729 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004730 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004731 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004732 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004733 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004734 goto error;
4735 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004736 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004737 } /* '%' */
4738 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004739 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004740 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004741 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004742 goto error;
4743 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004744 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004745 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004746 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004747 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004748 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004749
4750 unicode:
4751 if (args_owned) {
4752 Py_DECREF(args);
4753 args_owned = 0;
4754 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004755 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004756 if (PyTuple_Check(orig_args) && argidx > 0) {
4757 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004758 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004759 v = PyTuple_New(n);
4760 if (v == NULL)
4761 goto error;
4762 while (--n >= 0) {
4763 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4764 Py_INCREF(w);
4765 PyTuple_SET_ITEM(v, n, w);
4766 }
4767 args = v;
4768 } else {
4769 Py_INCREF(orig_args);
4770 args = orig_args;
4771 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004772 args_owned = 1;
4773 /* Take what we have of the result and let the Unicode formatting
4774 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004775 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004776 if (_PyString_Resize(&result, rescnt))
4777 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004778 fmtcnt = PyString_GET_SIZE(format) - \
4779 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004780 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4781 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004782 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004783 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004784 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004785 if (v == NULL)
4786 goto error;
4787 /* Paste what we have (result) to what the Unicode formatting
4788 function returned (v) and return the result (or error) */
4789 w = PyUnicode_Concat(result, v);
4790 Py_DECREF(result);
4791 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004792 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004793 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004794
Guido van Rossume5372401993-03-16 12:15:04 +00004795 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004796 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004797 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004798 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004799 }
Guido van Rossume5372401993-03-16 12:15:04 +00004800 return NULL;
4801}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004802
Guido van Rossum2a61e741997-01-18 07:55:05 +00004803void
Fred Drakeba096332000-07-09 07:04:36 +00004804PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004805{
4806 register PyStringObject *s = (PyStringObject *)(*p);
4807 PyObject *t;
4808 if (s == NULL || !PyString_Check(s))
4809 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004810 /* If it's a string subclass, we don't really know what putting
4811 it in the interned dict might do. */
4812 if (!PyString_CheckExact(s))
4813 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004814 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004815 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004816 if (interned == NULL) {
4817 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004818 if (interned == NULL) {
4819 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004820 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004821 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004822 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004823 t = PyDict_GetItem(interned, (PyObject *)s);
4824 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004825 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004826 Py_DECREF(*p);
4827 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004828 return;
4829 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004830
Armin Rigo79f7ad22004-08-07 19:27:39 +00004831 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004832 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004833 return;
4834 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004835 /* The two references in interned are not counted by refcnt.
4836 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004837 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004838 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004839}
4840
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004841void
4842PyString_InternImmortal(PyObject **p)
4843{
4844 PyString_InternInPlace(p);
4845 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4846 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4847 Py_INCREF(*p);
4848 }
4849}
4850
Guido van Rossum2a61e741997-01-18 07:55:05 +00004851
4852PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004853PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004854{
4855 PyObject *s = PyString_FromString(cp);
4856 if (s == NULL)
4857 return NULL;
4858 PyString_InternInPlace(&s);
4859 return s;
4860}
4861
Guido van Rossum8cf04761997-08-02 02:57:45 +00004862void
Fred Drakeba096332000-07-09 07:04:36 +00004863PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004864{
4865 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004866 for (i = 0; i < UCHAR_MAX + 1; i++) {
4867 Py_XDECREF(characters[i]);
4868 characters[i] = NULL;
4869 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004870 Py_XDECREF(nullstring);
4871 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004872}
Barry Warsawa903ad982001-02-23 16:40:48 +00004873
Barry Warsawa903ad982001-02-23 16:40:48 +00004874void _Py_ReleaseInternedStrings(void)
4875{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004876 PyObject *keys;
4877 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004878 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004879 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004880
4881 if (interned == NULL || !PyDict_Check(interned))
4882 return;
4883 keys = PyDict_Keys(interned);
4884 if (keys == NULL || !PyList_Check(keys)) {
4885 PyErr_Clear();
4886 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004887 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004888
4889 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4890 detector, interned strings are not forcibly deallocated; rather, we
4891 give them their stolen references back, and then clear and DECREF
4892 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004893
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004894 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004895 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4896 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004897 for (i = 0; i < n; i++) {
4898 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4899 switch (s->ob_sstate) {
4900 case SSTATE_NOT_INTERNED:
4901 /* XXX Shouldn't happen */
4902 break;
4903 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004904 Py_Refcnt(s) += 1;
4905 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004906 break;
4907 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004908 Py_Refcnt(s) += 2;
4909 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004910 break;
4911 default:
4912 Py_FatalError("Inconsistent interned string state.");
4913 }
4914 s->ob_sstate = SSTATE_NOT_INTERNED;
4915 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004916 fprintf(stderr, "total size of all interned strings: "
4917 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4918 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004919 Py_DECREF(keys);
4920 PyDict_Clear(interned);
4921 Py_DECREF(interned);
4922 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004923}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004924
4925
4926/*********************** Str Iterator ****************************/
4927
4928typedef struct {
4929 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004930 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004931 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4932} striterobject;
4933
4934static void
4935striter_dealloc(striterobject *it)
4936{
4937 _PyObject_GC_UNTRACK(it);
4938 Py_XDECREF(it->it_seq);
4939 PyObject_GC_Del(it);
4940}
4941
4942static int
4943striter_traverse(striterobject *it, visitproc visit, void *arg)
4944{
4945 Py_VISIT(it->it_seq);
4946 return 0;
4947}
4948
4949static PyObject *
4950striter_next(striterobject *it)
4951{
4952 PyStringObject *seq;
4953 PyObject *item;
4954
4955 assert(it != NULL);
4956 seq = it->it_seq;
4957 if (seq == NULL)
4958 return NULL;
4959 assert(PyString_Check(seq));
4960
4961 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004962 item = PyString_FromStringAndSize(
4963 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004964 if (item != NULL)
4965 ++it->it_index;
4966 return item;
4967 }
4968
4969 Py_DECREF(seq);
4970 it->it_seq = NULL;
4971 return NULL;
4972}
4973
4974static PyObject *
4975striter_len(striterobject *it)
4976{
4977 Py_ssize_t len = 0;
4978 if (it->it_seq)
4979 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4980 return PyInt_FromSsize_t(len);
4981}
4982
Guido van Rossum49d6b072006-08-17 21:11:47 +00004983PyDoc_STRVAR(length_hint_doc,
4984 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004985
4986static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004987 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4988 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004989 {NULL, NULL} /* sentinel */
4990};
4991
4992PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004993 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004994 "striterator", /* tp_name */
4995 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004996 0, /* tp_itemsize */
4997 /* methods */
4998 (destructor)striter_dealloc, /* tp_dealloc */
4999 0, /* tp_print */
5000 0, /* tp_getattr */
5001 0, /* tp_setattr */
5002 0, /* tp_compare */
5003 0, /* tp_repr */
5004 0, /* tp_as_number */
5005 0, /* tp_as_sequence */
5006 0, /* tp_as_mapping */
5007 0, /* tp_hash */
5008 0, /* tp_call */
5009 0, /* tp_str */
5010 PyObject_GenericGetAttr, /* tp_getattro */
5011 0, /* tp_setattro */
5012 0, /* tp_as_buffer */
5013 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5014 0, /* tp_doc */
5015 (traverseproc)striter_traverse, /* tp_traverse */
5016 0, /* tp_clear */
5017 0, /* tp_richcompare */
5018 0, /* tp_weaklistoffset */
5019 PyObject_SelfIter, /* tp_iter */
5020 (iternextfunc)striter_next, /* tp_iternext */
5021 striter_methods, /* tp_methods */
5022 0,
5023};
5024
5025static PyObject *
5026str_iter(PyObject *seq)
5027{
5028 striterobject *it;
5029
5030 if (!PyString_Check(seq)) {
5031 PyErr_BadInternalCall();
5032 return NULL;
5033 }
5034 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5035 if (it == NULL)
5036 return NULL;
5037 it->it_index = 0;
5038 Py_INCREF(seq);
5039 it->it_seq = (PyStringObject *)seq;
5040 _PyObject_GC_TRACK(it);
5041 return (PyObject *)it;
5042}