blob: 98fb0bd468d34b2bde4750747a27cc0966357c0c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
417 v->ob_type->tp_name);
418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
490 v->ob_type->tp_name);
491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
510 op->ob_refcnt = 3;
511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000522 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000689 if (!PyString_Check(op))
690 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000692}
693
694/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000695PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (!PyString_Check(op))
698 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000699 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700}
701
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702int
703PyString_AsStringAndSize(register PyObject *obj,
704 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000705 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706{
707 if (s == NULL) {
708 PyErr_BadInternalCall();
709 return -1;
710 }
711
712 if (!PyString_Check(obj)) {
713 if (PyUnicode_Check(obj)) {
714 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
715 if (obj == NULL)
716 return -1;
717 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000718 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000719 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 PyErr_Format(PyExc_TypeError,
Guido van Rossum8d30cc02007-05-03 17:49:24 +0000721 "expected str object, "
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722 "%.200s found", obj->ob_type->tp_name);
723 return -1;
724 }
725 }
726
727 *s = PyString_AS_STRING(obj);
728 if (len != NULL)
729 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000730 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731 PyErr_SetString(PyExc_TypeError,
732 "expected string without null bytes");
733 return -1;
734 }
735 return 0;
736}
737
Thomas Wouters477c8d52006-05-27 19:21:47 +0000738/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000739/* Methods */
740
Thomas Wouters477c8d52006-05-27 19:21:47 +0000741#define STRINGLIB_CHAR char
742
743#define STRINGLIB_CMP memcmp
744#define STRINGLIB_LEN PyString_GET_SIZE
745#define STRINGLIB_NEW PyString_FromStringAndSize
746#define STRINGLIB_STR PyString_AS_STRING
747
748#define STRINGLIB_EMPTY nullstring
749
750#include "stringlib/fastsearch.h"
751
752#include "stringlib/count.h"
753#include "stringlib/find.h"
754#include "stringlib/partition.h"
755
756
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000757static int
Fred Drakeba096332000-07-09 07:04:36 +0000758string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000759{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000760 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000762 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000763
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000764 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000765 if (! PyString_CheckExact(op)) {
766 int ret;
767 /* A str subclass may have its own __str__ method. */
768 op = (PyStringObject *) PyObject_Str((PyObject *)op);
769 if (op == NULL)
770 return -1;
771 ret = string_print(op, fp, flags);
772 Py_DECREF(op);
773 return ret;
774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000775 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000776 char *data = op->ob_sval;
777 Py_ssize_t size = op->ob_size;
778 while (size > INT_MAX) {
779 /* Very long strings cannot be written atomically.
780 * But don't write exactly INT_MAX bytes at a time
781 * to avoid memory aligment issues.
782 */
783 const int chunk_size = INT_MAX & ~0x3FFF;
784 fwrite(data, 1, chunk_size, fp);
785 data += chunk_size;
786 size -= chunk_size;
787 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000788#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000789 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000790#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000791 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000792#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795
Thomas Wouters7e474022000-07-16 12:04:32 +0000796 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000797 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000798 if (memchr(op->ob_sval, '\'', op->ob_size) &&
799 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000800 quote = '"';
801
802 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000803 for (i = 0; i < op->ob_size; i++) {
804 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000806 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000807 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000808 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000809 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000810 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000811 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000812 fprintf(fp, "\\r");
813 else if (c < ' ' || c >= 0x7f)
814 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000815 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000816 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000819 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000820}
821
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000822PyObject *
823PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000825 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000826 register PyStringObject* op = (PyStringObject*) obj;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000827 Py_ssize_t length = PyUnicode_GET_SIZE(op);
Tim Peterse7c05322004-06-27 17:24:49 +0000828 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000829 PyObject *v;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000830 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000831 PyErr_SetString(PyExc_OverflowError,
832 "string is too large to make repr");
833 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000834 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000836 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
838 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000839 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000840 register Py_UNICODE c;
841 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000842 int quote;
843
Thomas Wouters7e474022000-07-16 12:04:32 +0000844 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000846 if (smartquotes) {
847 Py_UNICODE *test;
848 for (test = p; test < p+length; ++test) {
849 if (*test == '"') {
850 quote = '\''; /* switch back to single quote */
851 goto decided;
852 }
853 else if (*test == '\'')
854 quote = '"';
855 }
856 decided:
857 ;
858 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000862 /* There's at least enough room for a hex escape
863 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000864 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000868 else if (c == '\t')
869 *p++ = '\\', *p++ = 't';
870 else if (c == '\n')
871 *p++ = '\\', *p++ = 'n';
872 else if (c == '\r')
873 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000874 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000875 *p++ = '\\';
876 *p++ = 'x';
877 *p++ = hexdigits[(c & 0xf0) >> 4];
878 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000879 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000880 else
881 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000883 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000886 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
887 Py_DECREF(v);
888 return NULL;
889 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000890 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892}
893
Guido van Rossum189f1df2001-05-01 16:51:53 +0000894static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000895string_repr(PyObject *op)
896{
897 return PyString_Repr(op, 1);
898}
899
900static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000901string_str(PyObject *s)
902{
Tim Petersc9933152001-10-16 20:18:24 +0000903 assert(PyString_Check(s));
904 if (PyString_CheckExact(s)) {
905 Py_INCREF(s);
906 return s;
907 }
908 else {
909 /* Subtype -- return genuine string with the same value. */
910 PyStringObject *t = (PyStringObject *) s;
911 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
912 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000913}
914
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000916string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917{
918 return a->ob_size;
919}
920
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000922string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000924 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925 register PyStringObject *op;
926 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000927 if (PyUnicode_Check(bb))
928 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000929 if (PyBytes_Check(bb))
930 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000931 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000932 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000933 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 return NULL;
935 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000936#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000938 if ((a->ob_size == 0 || b->ob_size == 0) &&
939 PyString_CheckExact(a) && PyString_CheckExact(b)) {
940 if (a->ob_size == 0) {
941 Py_INCREF(bb);
942 return bb;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 Py_INCREF(a);
945 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 }
947 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000948 if (size < 0) {
949 PyErr_SetString(PyExc_OverflowError,
950 "strings are too large to concat");
951 return NULL;
952 }
953
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000954 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000955 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000961 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
962 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000963 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965#undef b
966}
967
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000969string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000971 register Py_ssize_t i;
972 register Py_ssize_t j;
973 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000975 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976 if (n < 0)
977 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000978 /* watch out for overflows: the size can overflow int,
979 * and the # of bytes needed can overflow size_t
980 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000982 if (n && size / n != a->ob_size) {
983 PyErr_SetString(PyExc_OverflowError,
984 "repeated string is too long");
985 return NULL;
986 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000987 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 Py_INCREF(a);
989 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 }
Tim Peterse7c05322004-06-27 17:24:49 +0000991 nbytes = (size_t)size;
992 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000993 PyErr_SetString(PyExc_OverflowError,
994 "repeated string is too long");
995 return NULL;
996 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000998 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000999 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001001 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001002 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001003 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001004 op->ob_sval[size] = '\0';
1005 if (a->ob_size == 1 && n > 0) {
1006 memset(op->ob_sval, a->ob_sval[0] , n);
1007 return (PyObject *) op;
1008 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001009 i = 0;
1010 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001011 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001012 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001013 }
1014 while (i < size) {
1015 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001016 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001017 i += j;
1018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020}
1021
1022/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1023
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001025string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001026 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001027 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028{
1029 if (i < 0)
1030 i = 0;
1031 if (j < 0)
1032 j = 0; /* Avoid signed/unsigned bug in next line */
1033 if (j > a->ob_size)
1034 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001035 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1036 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001037 Py_INCREF(a);
1038 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 }
1040 if (j < i)
1041 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001042 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
Guido van Rossum9284a572000-03-07 15:53:43 +00001045static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001046string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001047{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049 if (PyUnicode_Check(sub_obj))
1050 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001051 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001052 PyErr_Format(PyExc_TypeError,
1053 "'in <string>' requires string as left operand, "
1054 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001055 return -1;
1056 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001057 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001058
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001060}
1061
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001062static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001063string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001064{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001065 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069 return NULL;
1070 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001071 pchar = a->ob_sval[i];
1072 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001073 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001074 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001075 else {
1076#ifdef COUNT_ALLOCS
1077 one_strings++;
1078#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001079 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001080 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001081 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001082}
1083
Martin v. Löwiscd353062001-05-24 16:56:35 +00001084static PyObject*
1085string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001087 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001088 Py_ssize_t len_a, len_b;
1089 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 PyObject *result;
1091
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001092 /* Make sure both arguments are strings. */
1093 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 result = Py_NotImplemented;
1095 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001096 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 if (a == b) {
1098 switch (op) {
1099 case Py_EQ:case Py_LE:case Py_GE:
1100 result = Py_True;
1101 goto out;
1102 case Py_NE:case Py_LT:case Py_GT:
1103 result = Py_False;
1104 goto out;
1105 }
1106 }
1107 if (op == Py_EQ) {
1108 /* Supporting Py_NE here as well does not save
1109 much time, since Py_NE is rarely used. */
1110 if (a->ob_size == b->ob_size
1111 && (a->ob_sval[0] == b->ob_sval[0]
Thomas Wouters27d517b2007-02-25 20:39:11 +00001112 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001113 result = Py_True;
1114 } else {
1115 result = Py_False;
1116 }
1117 goto out;
1118 }
1119 len_a = a->ob_size; len_b = b->ob_size;
1120 min_len = (len_a < len_b) ? len_a : len_b;
1121 if (min_len > 0) {
1122 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1123 if (c==0)
1124 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001125 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001126 c = 0;
1127 if (c == 0)
1128 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1129 switch (op) {
1130 case Py_LT: c = c < 0; break;
1131 case Py_LE: c = c <= 0; break;
1132 case Py_EQ: assert(0); break; /* unreachable */
1133 case Py_NE: c = c != 0; break;
1134 case Py_GT: c = c > 0; break;
1135 case Py_GE: c = c >= 0; break;
1136 default:
1137 result = Py_NotImplemented;
1138 goto out;
1139 }
1140 result = c ? Py_True : Py_False;
1141 out:
1142 Py_INCREF(result);
1143 return result;
1144}
1145
1146int
1147_PyString_Eq(PyObject *o1, PyObject *o2)
1148{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001149 PyStringObject *a = (PyStringObject*) o1;
1150 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001151 return a->ob_size == b->ob_size
1152 && *a->ob_sval == *b->ob_sval
1153 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001154}
1155
Guido van Rossum9bfef441993-03-29 10:43:31 +00001156static long
Fred Drakeba096332000-07-09 07:04:36 +00001157string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001158{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001159 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001160 register unsigned char *p;
1161 register long x;
1162
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001163 if (a->ob_shash != -1)
1164 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001165 len = a->ob_size;
1166 p = (unsigned char *) a->ob_sval;
1167 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001169 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001170 x ^= a->ob_size;
1171 if (x == -1)
1172 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001174 return x;
1175}
1176
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001177static PyObject*
1178string_subscript(PyStringObject* self, PyObject* item)
1179{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001180 if (PyIndex_Check(item)) {
1181 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001182 if (i == -1 && PyErr_Occurred())
1183 return NULL;
1184 if (i < 0)
1185 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001186 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001187 }
1188 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001189 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190 char* source_buf;
1191 char* result_buf;
1192 PyObject* result;
1193
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001194 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 PyString_GET_SIZE(self),
1196 &start, &stop, &step, &slicelength) < 0) {
1197 return NULL;
1198 }
1199
1200 if (slicelength <= 0) {
1201 return PyString_FromStringAndSize("", 0);
1202 }
1203 else {
1204 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001205 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001206 if (result_buf == NULL)
1207 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001208
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001209 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210 cur += step, i++) {
1211 result_buf[i] = source_buf[cur];
1212 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001213
1214 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 slicelength);
1216 PyMem_Free(result_buf);
1217 return result;
1218 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001219 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001221 PyErr_Format(PyExc_TypeError,
1222 "string indices must be integers, not %.200s",
1223 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 return NULL;
1225 }
1226}
1227
Martin v. Löwis18e16552006-02-15 17:27:45 +00001228static Py_ssize_t
1229string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001230{
1231 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001232 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001233 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001234 return -1;
1235 }
1236 *ptr = (void *)self->ob_sval;
1237 return self->ob_size;
1238}
1239
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240static Py_ssize_t
1241string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242{
Guido van Rossum045e6881997-09-08 18:30:11 +00001243 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001244 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001245 return -1;
1246}
1247
Martin v. Löwis18e16552006-02-15 17:27:45 +00001248static Py_ssize_t
1249string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001250{
1251 if ( lenp )
1252 *lenp = self->ob_size;
1253 return 1;
1254}
1255
Martin v. Löwis18e16552006-02-15 17:27:45 +00001256static Py_ssize_t
1257string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001258{
1259 if ( index != 0 ) {
1260 PyErr_SetString(PyExc_SystemError,
1261 "accessing non-existent string segment");
1262 return -1;
1263 }
1264 *ptr = self->ob_sval;
1265 return self->ob_size;
1266}
1267
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001268static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001270 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271 (ssizeargfunc)string_repeat, /*sq_repeat*/
1272 (ssizeargfunc)string_item, /*sq_item*/
1273 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001274 0, /*sq_ass_item*/
1275 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001276 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001277};
1278
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001279static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001281 (binaryfunc)string_subscript,
1282 0,
1283};
1284
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001285static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001286 (readbufferproc)string_buffer_getreadbuf,
1287 (writebufferproc)string_buffer_getwritebuf,
1288 (segcountproc)string_buffer_getsegcount,
1289 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001290};
1291
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292
1293
1294#define LEFTSTRIP 0
1295#define RIGHTSTRIP 1
1296#define BOTHSTRIP 2
1297
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001298/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001299static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1300
1301#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001302
Thomas Wouters477c8d52006-05-27 19:21:47 +00001303
1304/* Don't call if length < 2 */
1305#define Py_STRING_MATCH(target, offset, pattern, length) \
1306 (target[offset] == pattern[0] && \
1307 target[offset+length-1] == pattern[length-1] && \
1308 !memcmp(target+offset+1, pattern+1, length-2) )
1309
1310
1311/* Overallocate the initial list to reduce the number of reallocs for small
1312 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1313 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1314 text (roughly 11 words per line) and field delimited data (usually 1-10
1315 fields). For large strings the split algorithms are bandwidth limited
1316 so increasing the preallocation likely will not improve things.*/
1317
1318#define MAX_PREALLOC 12
1319
1320/* 5 splits gives 6 elements */
1321#define PREALLOC_SIZE(maxsplit) \
1322 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1323
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324#define SPLIT_APPEND(data, left, right) \
1325 str = PyString_FromStringAndSize((data) + (left), \
1326 (right) - (left)); \
1327 if (str == NULL) \
1328 goto onError; \
1329 if (PyList_Append(list, str)) { \
1330 Py_DECREF(str); \
1331 goto onError; \
1332 } \
1333 else \
1334 Py_DECREF(str);
1335
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 str = PyString_FromStringAndSize((data) + (left), \
1338 (right) - (left)); \
1339 if (str == NULL) \
1340 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001341 if (count < MAX_PREALLOC) { \
1342 PyList_SET_ITEM(list, count, str); \
1343 } else { \
1344 if (PyList_Append(list, str)) { \
1345 Py_DECREF(str); \
1346 goto onError; \
1347 } \
1348 else \
1349 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352
Thomas Wouters477c8d52006-05-27 19:21:47 +00001353/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001354#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001355
1356#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1357#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1358#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1359#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1360
1361Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001362split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001364 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001365 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001366 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
1368 if (list == NULL)
1369 return NULL;
1370
Thomas Wouters477c8d52006-05-27 19:21:47 +00001371 i = j = 0;
1372
1373 while (maxsplit-- > 0) {
1374 SKIP_SPACE(s, i, len);
1375 if (i==len) break;
1376 j = i; i++;
1377 SKIP_NONSPACE(s, i, len);
1378 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001380
1381 if (i < len) {
1382 /* Only occurs when maxsplit was reached */
1383 /* Skip any remaining whitespace and copy to end of string */
1384 SKIP_SPACE(s, i, len);
1385 if (i != len)
1386 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001387 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001388 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001390 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 Py_DECREF(list);
1392 return NULL;
1393}
1394
Thomas Wouters477c8d52006-05-27 19:21:47 +00001395Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001396split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001397{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001398 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001399 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001400 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001401
1402 if (list == NULL)
1403 return NULL;
1404
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405 i = j = 0;
1406 while ((j < len) && (maxcount-- > 0)) {
1407 for(; j<len; j++) {
1408 /* I found that using memchr makes no difference */
1409 if (s[j] == ch) {
1410 SPLIT_ADD(s, i, j);
1411 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413 }
1414 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001415 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416 if (i <= len) {
1417 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001418 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001419 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001420 return list;
1421
1422 onError:
1423 Py_DECREF(list);
1424 return NULL;
1425}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001427PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428"S.split([sep [,maxsplit]]) -> list of strings\n\
1429\n\
1430Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001431delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001432splits are done. If sep is not specified or is None, any\n\
1433whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434
1435static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001436string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001438 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001439 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001440 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001441 PyObject *list, *str, *subobj = Py_None;
1442#ifdef USE_FAST
1443 Py_ssize_t pos;
1444#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001446 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001448 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001449 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001451 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452 if (PyString_Check(subobj)) {
1453 sub = PyString_AS_STRING(subobj);
1454 n = PyString_GET_SIZE(subobj);
1455 }
1456 else if (PyUnicode_Check(subobj))
1457 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1458 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1459 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001460
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461 if (n == 0) {
1462 PyErr_SetString(PyExc_ValueError, "empty separator");
1463 return NULL;
1464 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 else if (n == 1)
1466 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467
Thomas Wouters477c8d52006-05-27 19:21:47 +00001468 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469 if (list == NULL)
1470 return NULL;
1471
Thomas Wouters477c8d52006-05-27 19:21:47 +00001472#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001474 while (maxsplit-- > 0) {
1475 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1476 if (pos < 0)
1477 break;
1478 j = i+pos;
1479 SPLIT_ADD(s, i, j);
1480 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001482#else
1483 i = j = 0;
1484 while ((j+n <= len) && (maxsplit-- > 0)) {
1485 for (; j+n <= len; j++) {
1486 if (Py_STRING_MATCH(s, j, sub, n)) {
1487 SPLIT_ADD(s, i, j);
1488 i = j = j + n;
1489 break;
1490 }
1491 }
1492 }
1493#endif
1494 SPLIT_ADD(s, i, len);
1495 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 return list;
1497
Thomas Wouters477c8d52006-05-27 19:21:47 +00001498 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 Py_DECREF(list);
1500 return NULL;
1501}
1502
Thomas Wouters477c8d52006-05-27 19:21:47 +00001503PyDoc_STRVAR(partition__doc__,
1504"S.partition(sep) -> (head, sep, tail)\n\
1505\n\
1506Searches for the separator sep in S, and returns the part before it,\n\
1507the separator itself, and the part after it. If the separator is not\n\
1508found, returns S and two empty strings.");
1509
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001510static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511string_partition(PyStringObject *self, PyObject *sep_obj)
1512{
1513 const char *sep;
1514 Py_ssize_t sep_len;
1515
1516 if (PyString_Check(sep_obj)) {
1517 sep = PyString_AS_STRING(sep_obj);
1518 sep_len = PyString_GET_SIZE(sep_obj);
1519 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001520 else if (PyUnicode_Check(sep_obj))
1521 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001522 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1523 return NULL;
1524
1525 return stringlib_partition(
1526 (PyObject*) self,
1527 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1528 sep_obj, sep, sep_len
1529 );
1530}
1531
1532PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001533"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001534\n\
1535Searches for the separator sep in S, starting at the end of S, and returns\n\
1536the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001537separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001538
1539static PyObject *
1540string_rpartition(PyStringObject *self, PyObject *sep_obj)
1541{
1542 const char *sep;
1543 Py_ssize_t sep_len;
1544
1545 if (PyString_Check(sep_obj)) {
1546 sep = PyString_AS_STRING(sep_obj);
1547 sep_len = PyString_GET_SIZE(sep_obj);
1548 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001549 else if (PyUnicode_Check(sep_obj))
1550 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001551 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1552 return NULL;
1553
1554 return stringlib_rpartition(
1555 (PyObject*) self,
1556 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1557 sep_obj, sep, sep_len
1558 );
1559}
1560
1561Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001562rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001563{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001565 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001567
1568 if (list == NULL)
1569 return NULL;
1570
Thomas Wouters477c8d52006-05-27 19:21:47 +00001571 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001572
Thomas Wouters477c8d52006-05-27 19:21:47 +00001573 while (maxsplit-- > 0) {
1574 RSKIP_SPACE(s, i);
1575 if (i<0) break;
1576 j = i; i--;
1577 RSKIP_NONSPACE(s, i);
1578 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001580 if (i >= 0) {
1581 /* Only occurs when maxsplit was reached */
1582 /* Skip any remaining whitespace and copy to beginning of string */
1583 RSKIP_SPACE(s, i);
1584 if (i >= 0)
1585 SPLIT_ADD(s, 0, i + 1);
1586
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001587 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 FIX_PREALLOC_SIZE(list);
1589 if (PyList_Reverse(list) < 0)
1590 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001591 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001592 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593 Py_DECREF(list);
1594 return NULL;
1595}
1596
Thomas Wouters477c8d52006-05-27 19:21:47 +00001597Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001598rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001599{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001600 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001601 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001602 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001603
1604 if (list == NULL)
1605 return NULL;
1606
Thomas Wouters477c8d52006-05-27 19:21:47 +00001607 i = j = len - 1;
1608 while ((i >= 0) && (maxcount-- > 0)) {
1609 for (; i >= 0; i--) {
1610 if (s[i] == ch) {
1611 SPLIT_ADD(s, i + 1, j + 1);
1612 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001613 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001614 }
1615 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001616 }
1617 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001620 FIX_PREALLOC_SIZE(list);
1621 if (PyList_Reverse(list) < 0)
1622 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001623 return list;
1624
1625 onError:
1626 Py_DECREF(list);
1627 return NULL;
1628}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001629
1630PyDoc_STRVAR(rsplit__doc__,
1631"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1632\n\
1633Return a list of the words in the string S, using sep as the\n\
1634delimiter string, starting at the end of the string and working\n\
1635to the front. If maxsplit is given, at most maxsplit splits are\n\
1636done. If sep is not specified or is None, any whitespace string\n\
1637is a separator.");
1638
1639static PyObject *
1640string_rsplit(PyStringObject *self, PyObject *args)
1641{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001642 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001643 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001644 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001645 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001646
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001647 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001648 return NULL;
1649 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001650 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001651 if (subobj == Py_None)
1652 return rsplit_whitespace(s, len, maxsplit);
1653 if (PyString_Check(subobj)) {
1654 sub = PyString_AS_STRING(subobj);
1655 n = PyString_GET_SIZE(subobj);
1656 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657 else if (PyUnicode_Check(subobj))
1658 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1660 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001661
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662 if (n == 0) {
1663 PyErr_SetString(PyExc_ValueError, "empty separator");
1664 return NULL;
1665 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001666 else if (n == 1)
1667 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668
Thomas Wouters477c8d52006-05-27 19:21:47 +00001669 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001670 if (list == NULL)
1671 return NULL;
1672
1673 j = len;
1674 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675
Thomas Wouters477c8d52006-05-27 19:21:47 +00001676 while ( (i >= 0) && (maxsplit-- > 0) ) {
1677 for (; i>=0; i--) {
1678 if (Py_STRING_MATCH(s, i, sub, n)) {
1679 SPLIT_ADD(s, i + n, j);
1680 j = i;
1681 i -= n;
1682 break;
1683 }
1684 }
1685 }
1686 SPLIT_ADD(s, 0, j);
1687 FIX_PREALLOC_SIZE(list);
1688 if (PyList_Reverse(list) < 0)
1689 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690 return list;
1691
Thomas Wouters477c8d52006-05-27 19:21:47 +00001692onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 Py_DECREF(list);
1694 return NULL;
1695}
1696
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001698PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699"S.join(sequence) -> string\n\
1700\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001702sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703
1704static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001705string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706{
1707 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001708 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001711 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001712 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001713 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001714 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715
Tim Peters19fe14e2001-01-19 03:03:47 +00001716 seq = PySequence_Fast(orig, "");
1717 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001718 return NULL;
1719 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001720
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001721 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001722 if (seqlen == 0) {
1723 Py_DECREF(seq);
1724 return PyString_FromString("");
1725 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001727 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001728 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1729 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001730 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001731 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001732 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001734
Raymond Hettinger674f2412004-08-23 23:23:54 +00001735 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001736 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001737 * Do a pre-pass to figure out the total amount of space we'll
1738 * need (sz), see whether any argument is absurd, and defer to
1739 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001740 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001741 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001743 item = PySequence_Fast_GET_ITEM(seq, i);
1744 if (!PyString_Check(item)){
1745 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001746 /* Defer to Unicode join.
1747 * CAUTION: There's no gurantee that the
1748 * original sequence can be iterated over
1749 * again, so we must pass seq here.
1750 */
1751 PyObject *result;
1752 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001753 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001754 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001755 }
1756 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001757 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001758 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001759 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 Py_DECREF(seq);
1761 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001762 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 sz += PyString_GET_SIZE(item);
1764 if (i != 0)
1765 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001766 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001767 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001768 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001769 Py_DECREF(seq);
1770 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 }
1773
1774 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001775 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001776 if (res == NULL) {
1777 Py_DECREF(seq);
1778 return NULL;
1779 }
1780
1781 /* Catenate everything. */
1782 p = PyString_AS_STRING(res);
1783 for (i = 0; i < seqlen; ++i) {
1784 size_t n;
1785 item = PySequence_Fast_GET_ITEM(seq, i);
1786 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001787 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001788 p += n;
1789 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001790 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001791 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001794
Jeremy Hylton49048292000-07-11 03:28:17 +00001795 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797}
1798
Tim Peters52e155e2001-06-16 05:42:57 +00001799PyObject *
1800_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001801{
Tim Petersa7259592001-06-16 05:11:17 +00001802 assert(sep != NULL && PyString_Check(sep));
1803 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001804 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001805}
1806
Thomas Wouters477c8d52006-05-27 19:21:47 +00001807Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001809{
1810 if (*end > len)
1811 *end = len;
1812 else if (*end < 0)
1813 *end += len;
1814 if (*end < 0)
1815 *end = 0;
1816 if (*start < 0)
1817 *start += len;
1818 if (*start < 0)
1819 *start = 0;
1820}
1821
Thomas Wouters477c8d52006-05-27 19:21:47 +00001822Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001823string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001825 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001826 const char *sub;
1827 Py_ssize_t sub_len;
1828 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829
Thomas Wouters477c8d52006-05-27 19:21:47 +00001830 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1831 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001832 return -2;
1833 if (PyString_Check(subobj)) {
1834 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001835 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836 }
1837 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001838 return PyUnicode_Find(
1839 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001840 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001841 /* XXX - the "expected a character buffer object" is pretty
1842 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 return -2;
1844
Thomas Wouters477c8d52006-05-27 19:21:47 +00001845 if (dir > 0)
1846 return stringlib_find_slice(
1847 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1848 sub, sub_len, start, end);
1849 else
1850 return stringlib_rfind_slice(
1851 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1852 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853}
1854
1855
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001856PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857"S.find(sub [,start [,end]]) -> int\n\
1858\n\
1859Return the lowest index in S where substring sub is found,\n\
1860such that sub is contained within s[start,end]. Optional\n\
1861arguments start and end are interpreted as in slice notation.\n\
1862\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001863Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864
1865static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001866string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001868 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 if (result == -2)
1870 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001871 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872}
1873
1874
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001875PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876"S.index(sub [,start [,end]]) -> int\n\
1877\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001878Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879
1880static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001881string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001883 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 if (result == -2)
1885 return NULL;
1886 if (result == -1) {
1887 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001888 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 return NULL;
1890 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001891 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892}
1893
1894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896"S.rfind(sub [,start [,end]]) -> int\n\
1897\n\
1898Return the highest index in S where substring sub is found,\n\
1899such that sub is contained within s[start,end]. Optional\n\
1900arguments start and end are interpreted as in slice notation.\n\
1901\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001902Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903
1904static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001905string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 if (result == -2)
1909 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001910 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911}
1912
1913
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001914PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915"S.rindex(sub [,start [,end]]) -> int\n\
1916\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001917Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918
1919static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001920string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001922 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 if (result == -2)
1924 return NULL;
1925 if (result == -1) {
1926 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001927 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 return NULL;
1929 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001930 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931}
1932
1933
Thomas Wouters477c8d52006-05-27 19:21:47 +00001934Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001935do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1936{
1937 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001939 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001940 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1941 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942
1943 i = 0;
1944 if (striptype != RIGHTSTRIP) {
1945 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1946 i++;
1947 }
1948 }
1949
1950 j = len;
1951 if (striptype != LEFTSTRIP) {
1952 do {
1953 j--;
1954 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1955 j++;
1956 }
1957
1958 if (i == 0 && j == len && PyString_CheckExact(self)) {
1959 Py_INCREF(self);
1960 return (PyObject*)self;
1961 }
1962 else
1963 return PyString_FromStringAndSize(s+i, j-i);
1964}
1965
1966
Thomas Wouters477c8d52006-05-27 19:21:47 +00001967Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001968do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969{
1970 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973 i = 0;
1974 if (striptype != RIGHTSTRIP) {
1975 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1976 i++;
1977 }
1978 }
1979
1980 j = len;
1981 if (striptype != LEFTSTRIP) {
1982 do {
1983 j--;
1984 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1985 j++;
1986 }
1987
Tim Peters8fa5dd02001-09-12 02:18:30 +00001988 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989 Py_INCREF(self);
1990 return (PyObject*)self;
1991 }
1992 else
1993 return PyString_FromStringAndSize(s+i, j-i);
1994}
1995
1996
Thomas Wouters477c8d52006-05-27 19:21:47 +00001997Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1999{
2000 PyObject *sep = NULL;
2001
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002002 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002003 return NULL;
2004
2005 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002006 if (PyString_Check(sep))
2007 return do_xstrip(self, striptype, sep);
2008 else if (PyUnicode_Check(sep)) {
2009 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2010 PyObject *res;
2011 if (uniself==NULL)
2012 return NULL;
2013 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2014 striptype, sep);
2015 Py_DECREF(uniself);
2016 return res;
2017 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002018 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002019 "%s arg must be None or str",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002020 STRIPNAME(striptype));
2021 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002022 }
2023
2024 return do_strip(self, striptype);
2025}
2026
2027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028PyDoc_STRVAR(strip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002029"S.strip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030\n\
2031Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002032whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002033If chars is given and not None, remove characters in chars instead.\n\
2034If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035
2036static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002037string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039 if (PyTuple_GET_SIZE(args) == 0)
2040 return do_strip(self, BOTHSTRIP); /* Common case */
2041 else
2042 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043}
2044
2045
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002046PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002047"S.lstrip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002049Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002050If chars is given and not None, remove characters in chars instead.\n\
2051If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052
2053static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002054string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002055{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002056 if (PyTuple_GET_SIZE(args) == 0)
2057 return do_strip(self, LEFTSTRIP); /* Common case */
2058 else
2059 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060}
2061
2062
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002063PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum8d30cc02007-05-03 17:49:24 +00002064"S.rstrip([chars]) -> str\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002066Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002067If chars is given and not None, remove characters in chars instead.\n\
2068If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069
2070static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002071string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002073 if (PyTuple_GET_SIZE(args) == 0)
2074 return do_strip(self, RIGHTSTRIP); /* Common case */
2075 else
2076 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077}
2078
2079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002080PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081"S.lower() -> string\n\
2082\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002083Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084
Thomas Wouters477c8d52006-05-27 19:21:47 +00002085/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2086#ifndef _tolower
2087#define _tolower tolower
2088#endif
2089
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002091string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002093 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002094 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002095 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002097 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002098 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002100
2101 s = PyString_AS_STRING(newobj);
2102
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002103 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002104
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002106 int c = Py_CHARMASK(s[i]);
2107 if (isupper(c))
2108 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002110
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002111 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112}
2113
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115"S.upper() -> string\n\
2116\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002117Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118
Thomas Wouters477c8d52006-05-27 19:21:47 +00002119#ifndef _toupper
2120#define _toupper toupper
2121#endif
2122
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002124string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002127 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002128 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002130 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002131 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002133
2134 s = PyString_AS_STRING(newobj);
2135
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002136 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002137
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002139 int c = Py_CHARMASK(s[i]);
2140 if (islower(c))
2141 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002143
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002144 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145}
2146
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002147PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148"S.title() -> string\n\
2149\n\
2150Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002151characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152
2153static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002154string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002155{
2156 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002157 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002159 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002161 newobj = PyString_FromStringAndSize(NULL, n);
2162 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002164 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165 for (i = 0; i < n; i++) {
2166 int c = Py_CHARMASK(*s++);
2167 if (islower(c)) {
2168 if (!previous_is_cased)
2169 c = toupper(c);
2170 previous_is_cased = 1;
2171 } else if (isupper(c)) {
2172 if (previous_is_cased)
2173 c = tolower(c);
2174 previous_is_cased = 1;
2175 } else
2176 previous_is_cased = 0;
2177 *s_new++ = c;
2178 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002179 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180}
2181
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002182PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183"S.capitalize() -> string\n\
2184\n\
2185Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002186capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187
2188static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002189string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190{
2191 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002192 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002193 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002195 newobj = PyString_FromStringAndSize(NULL, n);
2196 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002198 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 if (0 < n) {
2200 int c = Py_CHARMASK(*s++);
2201 if (islower(c))
2202 *s_new = toupper(c);
2203 else
2204 *s_new = c;
2205 s_new++;
2206 }
2207 for (i = 1; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (isupper(c))
2210 *s_new = tolower(c);
2211 else
2212 *s_new = c;
2213 s_new++;
2214 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002215 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216}
2217
2218
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002219PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220"S.count(sub[, start[, end]]) -> int\n\
2221\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002222Return the number of non-overlapping occurrences of substring sub in\n\
2223string S[start:end]. Optional arguments start and end are interpreted\n\
2224as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225
2226static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002227string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002229 PyObject *sub_obj;
2230 const char *str = PyString_AS_STRING(self), *sub;
2231 Py_ssize_t sub_len;
2232 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002237
Thomas Wouters477c8d52006-05-27 19:21:47 +00002238 if (PyString_Check(sub_obj)) {
2239 sub = PyString_AS_STRING(sub_obj);
2240 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002243 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002245 if (count == -1)
2246 return NULL;
2247 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002248 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002249 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002250 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002251 return NULL;
2252
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002254
Thomas Wouters477c8d52006-05-27 19:21:47 +00002255 return PyInt_FromSsize_t(
2256 stringlib_count(str + start, end - start, sub, sub_len)
2257 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261"S.swapcase() -> string\n\
2262\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002264converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
2266static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002267string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268{
2269 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002270 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002271 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002273 newobj = PyString_FromStringAndSize(NULL, n);
2274 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002276 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 for (i = 0; i < n; i++) {
2278 int c = Py_CHARMASK(*s++);
2279 if (islower(c)) {
2280 *s_new = toupper(c);
2281 }
2282 else if (isupper(c)) {
2283 *s_new = tolower(c);
2284 }
2285 else
2286 *s_new = c;
2287 s_new++;
2288 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002289 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290}
2291
2292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294"S.translate(table [,deletechars]) -> string\n\
2295\n\
2296Return a copy of the string S, where all characters occurring\n\
2297in the optional argument deletechars are removed, and the\n\
2298remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002299translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300
2301static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002302string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002305 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002306 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002308 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002309 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 PyObject *result;
2311 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002314 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317
2318 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002319 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 tablen = PyString_GET_SIZE(tableobj);
2321 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002322 else if (tableobj == Py_None) {
2323 table = NULL;
2324 tablen = 256;
2325 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002327 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 parameter; instead a mapping to None will cause characters
2329 to be deleted. */
2330 if (delobj != NULL) {
2331 PyErr_SetString(PyExc_TypeError,
2332 "deletions are implemented differently for unicode");
2333 return NULL;
2334 }
2335 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2336 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002337 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339
Martin v. Löwis00b61272002-12-12 20:03:19 +00002340 if (tablen != 256) {
2341 PyErr_SetString(PyExc_ValueError,
2342 "translation table must be 256 characters long");
2343 return NULL;
2344 }
2345
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 if (delobj != NULL) {
2347 if (PyString_Check(delobj)) {
2348 del_table = PyString_AS_STRING(delobj);
2349 dellen = PyString_GET_SIZE(delobj);
2350 }
2351 else if (PyUnicode_Check(delobj)) {
2352 PyErr_SetString(PyExc_TypeError,
2353 "deletions are implemented differently for unicode");
2354 return NULL;
2355 }
2356 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2357 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 }
2359 else {
2360 del_table = NULL;
2361 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362 }
2363
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002364 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 result = PyString_FromStringAndSize((char *)NULL, inlen);
2366 if (result == NULL)
2367 return NULL;
2368 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002369 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370
Guido van Rossumd8faa362007-04-27 19:54:29 +00002371 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 /* If no deletions are required, use faster code */
2373 for (i = inlen; --i >= 0; ) {
2374 c = Py_CHARMASK(*input++);
2375 if (Py_CHARMASK((*output++ = table[c])) != c)
2376 changed = 1;
2377 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002378 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return result;
2380 Py_DECREF(result);
2381 Py_INCREF(input_obj);
2382 return input_obj;
2383 }
2384
Guido van Rossumd8faa362007-04-27 19:54:29 +00002385 if (table == NULL) {
2386 for (i = 0; i < 256; i++)
2387 trans_table[i] = Py_CHARMASK(i);
2388 } else {
2389 for (i = 0; i < 256; i++)
2390 trans_table[i] = Py_CHARMASK(table[i]);
2391 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
2393 for (i = 0; i < dellen; i++)
2394 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2395
2396 for (i = inlen; --i >= 0; ) {
2397 c = Py_CHARMASK(*input++);
2398 if (trans_table[c] != -1)
2399 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2400 continue;
2401 changed = 1;
2402 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002403 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 Py_DECREF(result);
2405 Py_INCREF(input_obj);
2406 return input_obj;
2407 }
2408 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002409 if (inlen > 0)
2410 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 return result;
2412}
2413
2414
Thomas Wouters477c8d52006-05-27 19:21:47 +00002415#define FORWARD 1
2416#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417
Thomas Wouters477c8d52006-05-27 19:21:47 +00002418/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420#define findchar(target, target_len, c) \
2421 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422
Thomas Wouters477c8d52006-05-27 19:21:47 +00002423/* String ops must return a string. */
2424/* If the object is subclass of string, create a copy */
2425Py_LOCAL(PyStringObject *)
2426return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002428 if (PyString_CheckExact(self)) {
2429 Py_INCREF(self);
2430 return self;
2431 }
2432 return (PyStringObject *)PyString_FromStringAndSize(
2433 PyString_AS_STRING(self),
2434 PyString_GET_SIZE(self));
2435}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436
Thomas Wouters477c8d52006-05-27 19:21:47 +00002437Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002438countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002439{
2440 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002441 const char *start=target;
2442 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 while ( (start=findchar(start, end-start, c)) != NULL ) {
2445 count++;
2446 if (count >= maxcount)
2447 break;
2448 start += 1;
2449 }
2450 return count;
2451}
2452
2453Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002454findstring(const char *target, Py_ssize_t target_len,
2455 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002456 Py_ssize_t start,
2457 Py_ssize_t end,
2458 int direction)
2459{
2460 if (start < 0) {
2461 start += target_len;
2462 if (start < 0)
2463 start = 0;
2464 }
2465 if (end > target_len) {
2466 end = target_len;
2467 } else if (end < 0) {
2468 end += target_len;
2469 if (end < 0)
2470 end = 0;
2471 }
2472
2473 /* zero-length substrings always match at the first attempt */
2474 if (pattern_len == 0)
2475 return (direction > 0) ? start : end;
2476
2477 end -= pattern_len;
2478
2479 if (direction < 0) {
2480 for (; end >= start; end--)
2481 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2482 return end;
2483 } else {
2484 for (; start <= end; start++)
2485 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2486 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487 }
2488 return -1;
2489}
2490
Thomas Wouters477c8d52006-05-27 19:21:47 +00002491Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002492countstring(const char *target, Py_ssize_t target_len,
2493 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002494 Py_ssize_t start,
2495 Py_ssize_t end,
2496 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002497{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002498 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499
Thomas Wouters477c8d52006-05-27 19:21:47 +00002500 if (start < 0) {
2501 start += target_len;
2502 if (start < 0)
2503 start = 0;
2504 }
2505 if (end > target_len) {
2506 end = target_len;
2507 } else if (end < 0) {
2508 end += target_len;
2509 if (end < 0)
2510 end = 0;
2511 }
2512
2513 /* zero-length substrings match everywhere */
2514 if (pattern_len == 0 || maxcount == 0) {
2515 if (target_len+1 < maxcount)
2516 return target_len+1;
2517 return maxcount;
2518 }
2519
2520 end -= pattern_len;
2521 if (direction < 0) {
2522 for (; (end >= start); end--)
2523 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2524 count++;
2525 if (--maxcount <= 0) break;
2526 end -= pattern_len-1;
2527 }
2528 } else {
2529 for (; (start <= end); start++)
2530 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2531 count++;
2532 if (--maxcount <= 0)
2533 break;
2534 start += pattern_len-1;
2535 }
2536 }
2537 return count;
2538}
2539
2540
2541/* Algorithms for different cases of string replacement */
2542
2543/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2544Py_LOCAL(PyStringObject *)
2545replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002546 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002547 Py_ssize_t maxcount)
2548{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002549 char *self_s, *result_s;
2550 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002551 Py_ssize_t count, i, product;
2552 PyStringObject *result;
2553
2554 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002555
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 /* 1 at the end plus 1 after every character */
2557 count = self_len+1;
2558 if (maxcount < count)
2559 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560
Thomas Wouters477c8d52006-05-27 19:21:47 +00002561 /* Check for overflow */
2562 /* result_len = count * to_len + self_len; */
2563 product = count * to_len;
2564 if (product / to_len != count) {
2565 PyErr_SetString(PyExc_OverflowError,
2566 "replace string is too long");
2567 return NULL;
2568 }
2569 result_len = product + self_len;
2570 if (result_len < 0) {
2571 PyErr_SetString(PyExc_OverflowError,
2572 "replace string is too long");
2573 return NULL;
2574 }
2575
2576 if (! (result = (PyStringObject *)
2577 PyString_FromStringAndSize(NULL, result_len)) )
2578 return NULL;
2579
2580 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002581 result_s = PyString_AS_STRING(result);
2582
2583 /* TODO: special case single character, which doesn't need memcpy */
2584
2585 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002586 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002587 result_s += to_len;
2588 count -= 1;
2589
2590 for (i=0; i<count; i++) {
2591 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002592 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002593 result_s += to_len;
2594 }
2595
2596 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002597 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002598
2599 return result;
2600}
2601
2602/* Special case for deleting a single character */
2603/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2604Py_LOCAL(PyStringObject *)
2605replace_delete_single_character(PyStringObject *self,
2606 char from_c, Py_ssize_t maxcount)
2607{
2608 char *self_s, *result_s;
2609 char *start, *next, *end;
2610 Py_ssize_t self_len, result_len;
2611 Py_ssize_t count;
2612 PyStringObject *result;
2613
2614 self_len = PyString_GET_SIZE(self);
2615 self_s = PyString_AS_STRING(self);
2616
2617 count = countchar(self_s, self_len, from_c, maxcount);
2618 if (count == 0) {
2619 return return_self(self);
2620 }
2621
2622 result_len = self_len - count; /* from_len == 1 */
2623 assert(result_len>=0);
2624
2625 if ( (result = (PyStringObject *)
2626 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2627 return NULL;
2628 result_s = PyString_AS_STRING(result);
2629
2630 start = self_s;
2631 end = self_s + self_len;
2632 while (count-- > 0) {
2633 next = findchar(start, end-start, from_c);
2634 if (next == NULL)
2635 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002636 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002637 result_s += (next-start);
2638 start = next+1;
2639 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002640 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002641
Thomas Wouters477c8d52006-05-27 19:21:47 +00002642 return result;
2643}
2644
2645/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2646
2647Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002648replace_delete_substring(PyStringObject *self,
2649 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002650 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002651 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002652 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002653 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002654 Py_ssize_t count, offset;
2655 PyStringObject *result;
2656
2657 self_len = PyString_GET_SIZE(self);
2658 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002659
2660 count = countstring(self_s, self_len,
2661 from_s, from_len,
2662 0, self_len, 1,
2663 maxcount);
2664
2665 if (count == 0) {
2666 /* no matches */
2667 return return_self(self);
2668 }
2669
2670 result_len = self_len - (count * from_len);
2671 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002672
Thomas Wouters477c8d52006-05-27 19:21:47 +00002673 if ( (result = (PyStringObject *)
2674 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2675 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002676
Thomas Wouters477c8d52006-05-27 19:21:47 +00002677 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678
Thomas Wouters477c8d52006-05-27 19:21:47 +00002679 start = self_s;
2680 end = self_s + self_len;
2681 while (count-- > 0) {
2682 offset = findstring(start, end-start,
2683 from_s, from_len,
2684 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002685 if (offset == -1)
2686 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002687 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002689 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002690
Thomas Wouters477c8d52006-05-27 19:21:47 +00002691 result_s += (next-start);
2692 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002693 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002694 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002695 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002696}
2697
Thomas Wouters477c8d52006-05-27 19:21:47 +00002698/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2699Py_LOCAL(PyStringObject *)
2700replace_single_character_in_place(PyStringObject *self,
2701 char from_c, char to_c,
2702 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002703{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002704 char *self_s, *result_s, *start, *end, *next;
2705 Py_ssize_t self_len;
2706 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002707
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708 /* The result string will be the same size */
2709 self_s = PyString_AS_STRING(self);
2710 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002711
Thomas Wouters477c8d52006-05-27 19:21:47 +00002712 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002713
Thomas Wouters477c8d52006-05-27 19:21:47 +00002714 if (next == NULL) {
2715 /* No matches; return the original string */
2716 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002717 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002718
Thomas Wouters477c8d52006-05-27 19:21:47 +00002719 /* Need to make a new string */
2720 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2721 if (result == NULL)
2722 return NULL;
2723 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002724 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002725
Thomas Wouters477c8d52006-05-27 19:21:47 +00002726 /* change everything in-place, starting with this one */
2727 start = result_s + (next-self_s);
2728 *start = to_c;
2729 start++;
2730 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002731
Thomas Wouters477c8d52006-05-27 19:21:47 +00002732 while (--maxcount > 0) {
2733 next = findchar(start, end-start, from_c);
2734 if (next == NULL)
2735 break;
2736 *next = to_c;
2737 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002738 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002739
Thomas Wouters477c8d52006-05-27 19:21:47 +00002740 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002741}
2742
Thomas Wouters477c8d52006-05-27 19:21:47 +00002743/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2744Py_LOCAL(PyStringObject *)
2745replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002746 const char *from_s, Py_ssize_t from_len,
2747 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002748 Py_ssize_t maxcount)
2749{
2750 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002751 char *self_s;
2752 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002753 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002754
Thomas Wouters477c8d52006-05-27 19:21:47 +00002755 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002756
Thomas Wouters477c8d52006-05-27 19:21:47 +00002757 self_s = PyString_AS_STRING(self);
2758 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002759
Thomas Wouters477c8d52006-05-27 19:21:47 +00002760 offset = findstring(self_s, self_len,
2761 from_s, from_len,
2762 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002763 if (offset == -1) {
2764 /* No matches; return the original string */
2765 return return_self(self);
2766 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002767
Thomas Wouters477c8d52006-05-27 19:21:47 +00002768 /* Need to make a new string */
2769 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2770 if (result == NULL)
2771 return NULL;
2772 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002773 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002774
Thomas Wouters477c8d52006-05-27 19:21:47 +00002775 /* change everything in-place, starting with this one */
2776 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002777 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002778 start += from_len;
2779 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002780
Thomas Wouters477c8d52006-05-27 19:21:47 +00002781 while ( --maxcount > 0) {
2782 offset = findstring(start, end-start,
2783 from_s, from_len,
2784 0, end-start, FORWARD);
2785 if (offset==-1)
2786 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002787 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 start += offset+from_len;
2789 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002790
Thomas Wouters477c8d52006-05-27 19:21:47 +00002791 return result;
2792}
2793
2794/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2795Py_LOCAL(PyStringObject *)
2796replace_single_character(PyStringObject *self,
2797 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002798 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002799 Py_ssize_t maxcount)
2800{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002801 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002802 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002803 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002804 Py_ssize_t count, product;
2805 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002806
Thomas Wouters477c8d52006-05-27 19:21:47 +00002807 self_s = PyString_AS_STRING(self);
2808 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002809
Thomas Wouters477c8d52006-05-27 19:21:47 +00002810 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002811 if (count == 0) {
2812 /* no matches, return unchanged */
2813 return return_self(self);
2814 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002815
Thomas Wouters477c8d52006-05-27 19:21:47 +00002816 /* use the difference between current and new, hence the "-1" */
2817 /* result_len = self_len + count * (to_len-1) */
2818 product = count * (to_len-1);
2819 if (product / (to_len-1) != count) {
2820 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2821 return NULL;
2822 }
2823 result_len = self_len + product;
2824 if (result_len < 0) {
2825 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2826 return NULL;
2827 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002828
Thomas Wouters477c8d52006-05-27 19:21:47 +00002829 if ( (result = (PyStringObject *)
2830 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2831 return NULL;
2832 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002833
Thomas Wouters477c8d52006-05-27 19:21:47 +00002834 start = self_s;
2835 end = self_s + self_len;
2836 while (count-- > 0) {
2837 next = findchar(start, end-start, from_c);
2838 if (next == NULL)
2839 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002840
Thomas Wouters477c8d52006-05-27 19:21:47 +00002841 if (next == start) {
2842 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002843 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002844 result_s += to_len;
2845 start += 1;
2846 } else {
2847 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002848 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002849 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002850 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002851 result_s += to_len;
2852 start = next+1;
2853 }
2854 }
2855 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002856 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002857
Thomas Wouters477c8d52006-05-27 19:21:47 +00002858 return result;
2859}
2860
2861/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2862Py_LOCAL(PyStringObject *)
2863replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002864 const char *from_s, Py_ssize_t from_len,
2865 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002866 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002867 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002868 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002869 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002870 Py_ssize_t count, offset, product;
2871 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002872
Thomas Wouters477c8d52006-05-27 19:21:47 +00002873 self_s = PyString_AS_STRING(self);
2874 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002875
Thomas Wouters477c8d52006-05-27 19:21:47 +00002876 count = countstring(self_s, self_len,
2877 from_s, from_len,
2878 0, self_len, FORWARD, maxcount);
2879 if (count == 0) {
2880 /* no matches, return unchanged */
2881 return return_self(self);
2882 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002883
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 /* Check for overflow */
2885 /* result_len = self_len + count * (to_len-from_len) */
2886 product = count * (to_len-from_len);
2887 if (product / (to_len-from_len) != count) {
2888 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2889 return NULL;
2890 }
2891 result_len = self_len + product;
2892 if (result_len < 0) {
2893 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2894 return NULL;
2895 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002896
Thomas Wouters477c8d52006-05-27 19:21:47 +00002897 if ( (result = (PyStringObject *)
2898 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2899 return NULL;
2900 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002901
Thomas Wouters477c8d52006-05-27 19:21:47 +00002902 start = self_s;
2903 end = self_s + self_len;
2904 while (count-- > 0) {
2905 offset = findstring(start, end-start,
2906 from_s, from_len,
2907 0, end-start, FORWARD);
2908 if (offset == -1)
2909 break;
2910 next = start+offset;
2911 if (next == start) {
2912 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002913 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002914 result_s += to_len;
2915 start += from_len;
2916 } else {
2917 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002918 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002919 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002920 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002921 result_s += to_len;
2922 start = next+from_len;
2923 }
2924 }
2925 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002926 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002927
Thomas Wouters477c8d52006-05-27 19:21:47 +00002928 return result;
2929}
2930
2931
2932Py_LOCAL(PyStringObject *)
2933replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002934 const char *from_s, Py_ssize_t from_len,
2935 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002936 Py_ssize_t maxcount)
2937{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002938 if (maxcount < 0) {
2939 maxcount = PY_SSIZE_T_MAX;
2940 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2941 /* nothing to do; return the original string */
2942 return return_self(self);
2943 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002944
Thomas Wouters477c8d52006-05-27 19:21:47 +00002945 if (maxcount == 0 ||
2946 (from_len == 0 && to_len == 0)) {
2947 /* nothing to do; return the original string */
2948 return return_self(self);
2949 }
2950
2951 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002952
Thomas Wouters477c8d52006-05-27 19:21:47 +00002953 if (from_len == 0) {
2954 /* insert the 'to' string everywhere. */
2955 /* >>> "Python".replace("", ".") */
2956 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002957 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002958 }
2959
2960 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2961 /* point for an empty self string to generate a non-empty string */
2962 /* Special case so the remaining code always gets a non-empty string */
2963 if (PyString_GET_SIZE(self) == 0) {
2964 return return_self(self);
2965 }
2966
2967 if (to_len == 0) {
2968 /* delete all occurances of 'from' string */
2969 if (from_len == 1) {
2970 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002971 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002972 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002973 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002974 }
2975 }
2976
2977 /* Handle special case where both strings have the same length */
2978
2979 if (from_len == to_len) {
2980 if (from_len == 1) {
2981 return replace_single_character_in_place(
2982 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002983 from_s[0],
2984 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002985 maxcount);
2986 } else {
2987 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002988 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002989 }
2990 }
2991
2992 /* Otherwise use the more generic algorithms */
2993 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002994 return replace_single_character(self, from_s[0],
2995 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002996 } else {
2997 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002998 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002999 }
3000}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003001
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003002PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003003"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003004\n\
3005Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003006old replaced by new. If the optional argument count is\n\
3007given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003008
3009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003010string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003011{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003012 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003013 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003014 const char *from_s, *to_s;
3015 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003016
Thomas Wouters477c8d52006-05-27 19:21:47 +00003017 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003018 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003019
Thomas Wouters477c8d52006-05-27 19:21:47 +00003020 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003021 from_s = PyString_AS_STRING(from);
3022 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003023 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003024 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003025 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003026 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003027 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003028 return NULL;
3029
Thomas Wouters477c8d52006-05-27 19:21:47 +00003030 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003031 to_s = PyString_AS_STRING(to);
3032 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003034 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003035 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003036 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003037 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003038 return NULL;
3039
Thomas Wouters477c8d52006-05-27 19:21:47 +00003040 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003041 from_s, from_len,
3042 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003043}
3044
Thomas Wouters477c8d52006-05-27 19:21:47 +00003045/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003047/* Matches the end (direction >= 0) or start (direction < 0) of self
3048 * against substr, using the start and end arguments. Returns
3049 * -1 on error, 0 if not found and 1 if found.
3050 */
3051Py_LOCAL(int)
3052_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3053 Py_ssize_t end, int direction)
3054{
3055 Py_ssize_t len = PyString_GET_SIZE(self);
3056 Py_ssize_t slen;
3057 const char* sub;
3058 const char* str;
3059
3060 if (PyString_Check(substr)) {
3061 sub = PyString_AS_STRING(substr);
3062 slen = PyString_GET_SIZE(substr);
3063 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003064 else if (PyUnicode_Check(substr))
3065 return PyUnicode_Tailmatch((PyObject *)self,
3066 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003067 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3068 return -1;
3069 str = PyString_AS_STRING(self);
3070
3071 string_adjust_indices(&start, &end, len);
3072
3073 if (direction < 0) {
3074 /* startswith */
3075 if (start+slen > len)
3076 return 0;
3077 } else {
3078 /* endswith */
3079 if (end-start < slen || start > len)
3080 return 0;
3081
3082 if (end-slen > start)
3083 start = end - slen;
3084 }
3085 if (end-start >= slen)
3086 return ! memcmp(str+start, sub, slen);
3087 return 0;
3088}
3089
3090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003091PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003092"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003094Return True if S starts with the specified prefix, False otherwise.\n\
3095With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003096With optional end, stop comparing S at that position.\n\
3097prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098
3099static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003100string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003101{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003102 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003103 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003105 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003106
Guido van Rossumc6821402000-05-08 14:08:05 +00003107 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3108 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003110 if (PyTuple_Check(subobj)) {
3111 Py_ssize_t i;
3112 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3113 result = _string_tailmatch(self,
3114 PyTuple_GET_ITEM(subobj, i),
3115 start, end, -1);
3116 if (result == -1)
3117 return NULL;
3118 else if (result) {
3119 Py_RETURN_TRUE;
3120 }
3121 }
3122 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003124 result = _string_tailmatch(self, subobj, start, end, -1);
3125 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003126 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003127 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003128 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003129}
3130
3131
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003132PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003133"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003134\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003135Return True if S ends with the specified suffix, False otherwise.\n\
3136With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003137With optional end, stop comparing S at that position.\n\
3138suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139
3140static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003141string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003142{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003143 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003144 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003146 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147
Guido van Rossumc6821402000-05-08 14:08:05 +00003148 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3149 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003151 if (PyTuple_Check(subobj)) {
3152 Py_ssize_t i;
3153 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3154 result = _string_tailmatch(self,
3155 PyTuple_GET_ITEM(subobj, i),
3156 start, end, +1);
3157 if (result == -1)
3158 return NULL;
3159 else if (result) {
3160 Py_RETURN_TRUE;
3161 }
3162 }
3163 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003165 result = _string_tailmatch(self, subobj, start, end, +1);
3166 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003167 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003168 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003169 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003170}
3171
3172
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003173PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003174"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003175\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003176Encodes S using the codec registered for encoding. encoding defaults\n\
3177to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003178handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003179a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3180'xmlcharrefreplace' as well as any other name registered with\n\
3181codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003182
3183static PyObject *
3184string_encode(PyStringObject *self, PyObject *args)
3185{
3186 char *encoding = NULL;
3187 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003188 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003189
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003190 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3191 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003192 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003193 if (v == NULL)
3194 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003195 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003196 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003197 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003198 "(type=%.400s)",
3199 v->ob_type->tp_name);
3200 Py_DECREF(v);
3201 return NULL;
3202 }
3203 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003204
3205 onError:
3206 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003207}
3208
3209
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003210PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003211"S.decode([encoding[,errors]]) -> object\n\
3212\n\
3213Decodes S using the codec registered for encoding. encoding defaults\n\
3214to the default encoding. errors may be given to set a different error\n\
3215handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003216a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3217as well as any other name registerd with codecs.register_error that is\n\
3218able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003219
3220static PyObject *
3221string_decode(PyStringObject *self, PyObject *args)
3222{
3223 char *encoding = NULL;
3224 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003225 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003226
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003227 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3228 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003229 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003230 if (v == NULL)
3231 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003232 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3233 PyErr_Format(PyExc_TypeError,
3234 "decoder did not return a string/unicode object "
3235 "(type=%.400s)",
3236 v->ob_type->tp_name);
3237 Py_DECREF(v);
3238 return NULL;
3239 }
3240 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003241
3242 onError:
3243 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003244}
3245
3246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003247PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003248"S.expandtabs([tabsize]) -> string\n\
3249\n\
3250Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003251If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003252
3253static PyObject*
3254string_expandtabs(PyStringObject *self, PyObject *args)
3255{
3256 const char *e, *p;
3257 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003258 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003259 PyObject *u;
3260 int tabsize = 8;
3261
3262 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3263 return NULL;
3264
Thomas Wouters7e474022000-07-16 12:04:32 +00003265 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 i = j = 0;
3267 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3268 for (p = PyString_AS_STRING(self); p < e; p++)
3269 if (*p == '\t') {
3270 if (tabsize > 0)
3271 j += tabsize - (j % tabsize);
3272 }
3273 else {
3274 j++;
3275 if (*p == '\n' || *p == '\r') {
3276 i += j;
3277 j = 0;
3278 }
3279 }
3280
3281 /* Second pass: create output string and fill it */
3282 u = PyString_FromStringAndSize(NULL, i + j);
3283 if (!u)
3284 return NULL;
3285
3286 j = 0;
3287 q = PyString_AS_STRING(u);
3288
3289 for (p = PyString_AS_STRING(self); p < e; p++)
3290 if (*p == '\t') {
3291 if (tabsize > 0) {
3292 i = tabsize - (j % tabsize);
3293 j += i;
3294 while (i--)
3295 *q++ = ' ';
3296 }
3297 }
3298 else {
3299 j++;
3300 *q++ = *p;
3301 if (*p == '\n' || *p == '\r')
3302 j = 0;
3303 }
3304
3305 return u;
3306}
3307
Thomas Wouters477c8d52006-05-27 19:21:47 +00003308Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003309pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310{
3311 PyObject *u;
3312
3313 if (left < 0)
3314 left = 0;
3315 if (right < 0)
3316 right = 0;
3317
Tim Peters8fa5dd02001-09-12 02:18:30 +00003318 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003319 Py_INCREF(self);
3320 return (PyObject *)self;
3321 }
3322
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003323 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324 left + PyString_GET_SIZE(self) + right);
3325 if (u) {
3326 if (left)
3327 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003328 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003329 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003330 PyString_GET_SIZE(self));
3331 if (right)
3332 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3333 fill, right);
3334 }
3335
3336 return u;
3337}
3338
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003339PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003340"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003341"\n"
3342"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003343"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344
3345static PyObject *
3346string_ljust(PyStringObject *self, PyObject *args)
3347{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003348 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003349 char fillchar = ' ';
3350
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003351 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 return NULL;
3353
Tim Peters8fa5dd02001-09-12 02:18:30 +00003354 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003355 Py_INCREF(self);
3356 return (PyObject*) self;
3357 }
3358
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003359 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003360}
3361
3362
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003363PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003364"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003365"\n"
3366"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003367"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003368
3369static PyObject *
3370string_rjust(PyStringObject *self, PyObject *args)
3371{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003372 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003373 char fillchar = ' ';
3374
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003375 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376 return NULL;
3377
Tim Peters8fa5dd02001-09-12 02:18:30 +00003378 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379 Py_INCREF(self);
3380 return (PyObject*) self;
3381 }
3382
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003383 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384}
3385
3386
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003387PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003388"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003389"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003390"Return S centered in a string of length width. Padding is\n"
3391"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003392
3393static PyObject *
3394string_center(PyStringObject *self, PyObject *args)
3395{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003396 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003397 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003398 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003400 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401 return NULL;
3402
Tim Peters8fa5dd02001-09-12 02:18:30 +00003403 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003404 Py_INCREF(self);
3405 return (PyObject*) self;
3406 }
3407
3408 marg = width - PyString_GET_SIZE(self);
3409 left = marg / 2 + (marg & width & 1);
3410
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003411 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412}
3413
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003414PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003415"S.zfill(width) -> string\n"
3416"\n"
3417"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003418"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003419
3420static PyObject *
3421string_zfill(PyStringObject *self, PyObject *args)
3422{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003423 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003424 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003425 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003426 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003427
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003428 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003429 return NULL;
3430
3431 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003432 if (PyString_CheckExact(self)) {
3433 Py_INCREF(self);
3434 return (PyObject*) self;
3435 }
3436 else
3437 return PyString_FromStringAndSize(
3438 PyString_AS_STRING(self),
3439 PyString_GET_SIZE(self)
3440 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003441 }
3442
3443 fill = width - PyString_GET_SIZE(self);
3444
3445 s = pad(self, fill, 0, '0');
3446
3447 if (s == NULL)
3448 return NULL;
3449
3450 p = PyString_AS_STRING(s);
3451 if (p[fill] == '+' || p[fill] == '-') {
3452 /* move sign to beginning of string */
3453 p[0] = p[fill];
3454 p[fill] = '0';
3455 }
3456
3457 return (PyObject*) s;
3458}
3459
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003460PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003461"S.isspace() -> bool\n\
3462\n\
3463Return True if all characters in S are whitespace\n\
3464and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003465
3466static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003467string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468{
Fred Drakeba096332000-07-09 07:04:36 +00003469 register const unsigned char *p
3470 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003471 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003472
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473 /* Shortcut for single character strings */
3474 if (PyString_GET_SIZE(self) == 1 &&
3475 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003476 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003478 /* Special case for empty strings */
3479 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003480 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003481
Guido van Rossum4c08d552000-03-10 22:55:18 +00003482 e = p + PyString_GET_SIZE(self);
3483 for (; p < e; p++) {
3484 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003485 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003486 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003487 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488}
3489
3490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003491PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003492"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003493\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003494Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003495and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003496
3497static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003498string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003499{
Fred Drakeba096332000-07-09 07:04:36 +00003500 register const unsigned char *p
3501 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003502 register const unsigned char *e;
3503
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003504 /* Shortcut for single character strings */
3505 if (PyString_GET_SIZE(self) == 1 &&
3506 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003507 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003508
3509 /* Special case for empty strings */
3510 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003511 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003512
3513 e = p + PyString_GET_SIZE(self);
3514 for (; p < e; p++) {
3515 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003516 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003517 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003519}
3520
3521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003522PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003523"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003524\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003525Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003526and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003527
3528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003529string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530{
Fred Drakeba096332000-07-09 07:04:36 +00003531 register const unsigned char *p
3532 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003533 register const unsigned char *e;
3534
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003535 /* Shortcut for single character strings */
3536 if (PyString_GET_SIZE(self) == 1 &&
3537 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539
3540 /* Special case for empty strings */
3541 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003542 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003543
3544 e = p + PyString_GET_SIZE(self);
3545 for (; p < e; p++) {
3546 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003547 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003548 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550}
3551
3552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003553PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003556Return True if all characters in S are digits\n\
3557and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558
3559static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003560string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561{
Fred Drakeba096332000-07-09 07:04:36 +00003562 register const unsigned char *p
3563 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003564 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003565
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566 /* Shortcut for single character strings */
3567 if (PyString_GET_SIZE(self) == 1 &&
3568 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003570
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003571 /* Special case for empty strings */
3572 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003574
Guido van Rossum4c08d552000-03-10 22:55:18 +00003575 e = p + PyString_GET_SIZE(self);
3576 for (; p < e; p++) {
3577 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003579 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003581}
3582
3583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003584PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003588at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589
3590static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003591string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592{
Fred Drakeba096332000-07-09 07:04:36 +00003593 register const unsigned char *p
3594 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003595 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003596 int cased;
3597
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003602 /* Special case for empty strings */
3603 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003605
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606 e = p + PyString_GET_SIZE(self);
3607 cased = 0;
3608 for (; p < e; p++) {
3609 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611 else if (!cased && islower(*p))
3612 cased = 1;
3613 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615}
3616
3617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003618PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003621Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003622at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623
3624static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003625string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626{
Fred Drakeba096332000-07-09 07:04:36 +00003627 register const unsigned char *p
3628 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003629 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630 int cased;
3631
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632 /* Shortcut for single character strings */
3633 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003636 /* Special case for empty strings */
3637 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003639
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640 e = p + PyString_GET_SIZE(self);
3641 cased = 0;
3642 for (; p < e; p++) {
3643 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645 else if (!cased && isupper(*p))
3646 cased = 1;
3647 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649}
3650
3651
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003652PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003655Return True if S is a titlecased string and there is at least one\n\
3656character in S, i.e. uppercase characters may only follow uncased\n\
3657characters and lowercase characters only cased ones. Return False\n\
3658otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
3660static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003661string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662{
Fred Drakeba096332000-07-09 07:04:36 +00003663 register const unsigned char *p
3664 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003665 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666 int cased, previous_is_cased;
3667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 /* Shortcut for single character strings */
3669 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003672 /* Special case for empty strings */
3673 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003675
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 e = p + PyString_GET_SIZE(self);
3677 cased = 0;
3678 previous_is_cased = 0;
3679 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003680 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681
3682 if (isupper(ch)) {
3683 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685 previous_is_cased = 1;
3686 cased = 1;
3687 }
3688 else if (islower(ch)) {
3689 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691 previous_is_cased = 1;
3692 cased = 1;
3693 }
3694 else
3695 previous_is_cased = 0;
3696 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698}
3699
3700
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003701PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003702"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703\n\
3704Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003705Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003706is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708static PyObject*
3709string_splitlines(PyStringObject *self, PyObject *args)
3710{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003711 register Py_ssize_t i;
3712 register Py_ssize_t j;
3713 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003714 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715 PyObject *list;
3716 PyObject *str;
3717 char *data;
3718
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003719 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 return NULL;
3721
3722 data = PyString_AS_STRING(self);
3723 len = PyString_GET_SIZE(self);
3724
Thomas Wouters477c8d52006-05-27 19:21:47 +00003725 /* This does not use the preallocated list because splitlines is
3726 usually run with hundreds of newlines. The overhead of
3727 switching between PyList_SET_ITEM and append causes about a
3728 2-3% slowdown for that common case. A smarter implementation
3729 could move the if check out, so the SET_ITEMs are done first
3730 and the appends only done when the prealloc buffer is full.
3731 That's too much work for little gain.*/
3732
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 list = PyList_New(0);
3734 if (!list)
3735 goto onError;
3736
3737 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003738 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003739
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740 /* Find a line and append it */
3741 while (i < len && data[i] != '\n' && data[i] != '\r')
3742 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
3744 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003745 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 if (i < len) {
3747 if (data[i] == '\r' && i + 1 < len &&
3748 data[i+1] == '\n')
3749 i += 2;
3750 else
3751 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003752 if (keepends)
3753 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003755 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 j = i;
3757 }
3758 if (j < len) {
3759 SPLIT_APPEND(data, j, len);
3760 }
3761
3762 return list;
3763
3764 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003765 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 return NULL;
3767}
3768
3769#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003770#undef SPLIT_ADD
3771#undef MAX_PREALLOC
3772#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003774static PyObject *
3775string_getnewargs(PyStringObject *v)
3776{
3777 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3778}
3779
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003780
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003781static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003782string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003783 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3784 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003785 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003786 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3787 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003788 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3789 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3790 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3791 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3792 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3793 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3794 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003795 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3796 capitalize__doc__},
3797 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3798 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3799 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003800 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003801 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3802 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3803 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3804 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3805 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3806 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3807 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003808 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3809 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003810 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3811 startswith__doc__},
3812 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3813 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3814 swapcase__doc__},
3815 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3816 translate__doc__},
3817 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3818 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3819 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3820 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3821 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3822 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3823 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3824 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3825 expandtabs__doc__},
3826 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3827 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003828 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003829 {NULL, NULL} /* sentinel */
3830};
3831
Jeremy Hylton938ace62002-07-17 16:30:39 +00003832static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003833str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3834
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003835static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003836string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003837{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003838 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003839 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003840
Guido van Rossumae960af2001-08-30 03:11:59 +00003841 if (type != &PyString_Type)
3842 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003843 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3844 return NULL;
3845 if (x == NULL)
3846 return PyString_FromString("");
3847 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003848}
3849
Guido van Rossumae960af2001-08-30 03:11:59 +00003850static PyObject *
3851str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3852{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003853 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003854 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003855
3856 assert(PyType_IsSubtype(type, &PyString_Type));
3857 tmp = string_new(&PyString_Type, args, kwds);
3858 if (tmp == NULL)
3859 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003860 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003861 n = PyString_GET_SIZE(tmp);
3862 pnew = type->tp_alloc(type, n);
3863 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003864 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003865 ((PyStringObject *)pnew)->ob_shash =
3866 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003867 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003868 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003869 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003870 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003871}
3872
Guido van Rossumcacfc072002-05-24 19:01:59 +00003873static PyObject *
3874basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3875{
3876 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003877 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003878 return NULL;
3879}
3880
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003881static PyObject *
3882string_mod(PyObject *v, PyObject *w)
3883{
3884 if (!PyString_Check(v)) {
3885 Py_INCREF(Py_NotImplemented);
3886 return Py_NotImplemented;
3887 }
3888 return PyString_Format(v, w);
3889}
3890
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003891PyDoc_STRVAR(basestring_doc,
3892"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003893
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003894static PyNumberMethods string_as_number = {
3895 0, /*nb_add*/
3896 0, /*nb_subtract*/
3897 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003898 string_mod, /*nb_remainder*/
3899};
3900
3901
Guido van Rossumcacfc072002-05-24 19:01:59 +00003902PyTypeObject PyBaseString_Type = {
3903 PyObject_HEAD_INIT(&PyType_Type)
3904 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003905 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003906 0,
3907 0,
3908 0, /* tp_dealloc */
3909 0, /* tp_print */
3910 0, /* tp_getattr */
3911 0, /* tp_setattr */
3912 0, /* tp_compare */
3913 0, /* tp_repr */
3914 0, /* tp_as_number */
3915 0, /* tp_as_sequence */
3916 0, /* tp_as_mapping */
3917 0, /* tp_hash */
3918 0, /* tp_call */
3919 0, /* tp_str */
3920 0, /* tp_getattro */
3921 0, /* tp_setattro */
3922 0, /* tp_as_buffer */
3923 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3924 basestring_doc, /* tp_doc */
3925 0, /* tp_traverse */
3926 0, /* tp_clear */
3927 0, /* tp_richcompare */
3928 0, /* tp_weaklistoffset */
3929 0, /* tp_iter */
3930 0, /* tp_iternext */
3931 0, /* tp_methods */
3932 0, /* tp_members */
3933 0, /* tp_getset */
3934 &PyBaseObject_Type, /* tp_base */
3935 0, /* tp_dict */
3936 0, /* tp_descr_get */
3937 0, /* tp_descr_set */
3938 0, /* tp_dictoffset */
3939 0, /* tp_init */
3940 0, /* tp_alloc */
3941 basestring_new, /* tp_new */
3942 0, /* tp_free */
3943};
3944
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003945PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003946"str(object) -> string\n\
3947\n\
3948Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003949If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003950
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003951static PyObject *str_iter(PyObject *seq);
3952
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003953PyTypeObject PyString_Type = {
3954 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003955 0,
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003956 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003957 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003958 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003959 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003960 (printfunc)string_print, /* tp_print */
3961 0, /* tp_getattr */
3962 0, /* tp_setattr */
3963 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003964 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003965 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003966 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003967 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003968 (hashfunc)string_hash, /* tp_hash */
3969 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003970 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003971 PyObject_GenericGetAttr, /* tp_getattro */
3972 0, /* tp_setattro */
3973 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003974 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3975 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003976 string_doc, /* tp_doc */
3977 0, /* tp_traverse */
3978 0, /* tp_clear */
3979 (richcmpfunc)string_richcompare, /* tp_richcompare */
3980 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003981 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003982 0, /* tp_iternext */
3983 string_methods, /* tp_methods */
3984 0, /* tp_members */
3985 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003986 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003987 0, /* tp_dict */
3988 0, /* tp_descr_get */
3989 0, /* tp_descr_set */
3990 0, /* tp_dictoffset */
3991 0, /* tp_init */
3992 0, /* tp_alloc */
3993 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003994 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003995};
3996
3997void
Fred Drakeba096332000-07-09 07:04:36 +00003998PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003999{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004000 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004001 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004002 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004003 if (w == NULL || !PyString_Check(*pv)) {
4004 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004005 *pv = NULL;
4006 return;
4007 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004008 v = string_concat((PyStringObject *) *pv, w);
4009 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004010 *pv = v;
4011}
4012
Guido van Rossum013142a1994-08-30 08:19:36 +00004013void
Fred Drakeba096332000-07-09 07:04:36 +00004014PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004015{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016 PyString_Concat(pv, w);
4017 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004018}
4019
4020
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004021/* The following function breaks the notion that strings are immutable:
4022 it changes the size of a string. We get away with this only if there
4023 is only one module referencing the object. You can also think of it
4024 as creating a new string object and destroying the old one, only
4025 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004026 already be known to some other part of the code...
4027 Note that if there's not enough memory to resize the string, the original
4028 string object at *pv is deallocated, *pv is set to NULL, an "out of
4029 memory" exception is set, and -1 is returned. Else (on success) 0 is
4030 returned, and the value in *pv may or may not be the same as on input.
4031 As always, an extra byte is allocated for a trailing \0 byte (newsize
4032 does *not* include that), and a trailing \0 byte is stored.
4033*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004034
4035int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004036_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004037{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004038 register PyObject *v;
4039 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004040 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004041 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4042 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004044 Py_DECREF(v);
4045 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004046 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004048 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004049 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004050 _Py_ForgetReference(v);
4051 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004052 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004053 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004054 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004056 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004057 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004058 _Py_NewReference(*pv);
4059 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004060 sv->ob_size = newsize;
4061 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004062 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004063 return 0;
4064}
Guido van Rossume5372401993-03-16 12:15:04 +00004065
4066/* Helpers for formatstring */
4067
Thomas Wouters477c8d52006-05-27 19:21:47 +00004068Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004069getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004070{
Thomas Wouters977485d2006-02-16 15:59:12 +00004071 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004072 if (argidx < arglen) {
4073 (*p_argidx)++;
4074 if (arglen < 0)
4075 return args;
4076 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004077 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004078 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 PyErr_SetString(PyExc_TypeError,
4080 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004081 return NULL;
4082}
4083
Tim Peters38fd5b62000-09-21 05:43:11 +00004084/* Format codes
4085 * F_LJUST '-'
4086 * F_SIGN '+'
4087 * F_BLANK ' '
4088 * F_ALT '#'
4089 * F_ZERO '0'
4090 */
Guido van Rossume5372401993-03-16 12:15:04 +00004091#define F_LJUST (1<<0)
4092#define F_SIGN (1<<1)
4093#define F_BLANK (1<<2)
4094#define F_ALT (1<<3)
4095#define F_ZERO (1<<4)
4096
Thomas Wouters477c8d52006-05-27 19:21:47 +00004097Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004098formatfloat(char *buf, size_t buflen, int flags,
4099 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004100{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004101 /* fmt = '%#.' + `prec` + `type`
4102 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004103 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004104 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004105 x = PyFloat_AsDouble(v);
4106 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004107 PyErr_Format(PyExc_TypeError, "float argument required, "
4108 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004109 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004110 }
Guido van Rossume5372401993-03-16 12:15:04 +00004111 if (prec < 0)
4112 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004113 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4114 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004115 /* Worst case length calc to ensure no buffer overrun:
4116
4117 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004118 fmt = %#.<prec>g
4119 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004120 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004121 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004122
4123 'f' formats:
4124 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4125 len = 1 + 50 + 1 + prec = 52 + prec
4126
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004127 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004128 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004129
4130 */
4131 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4132 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004133 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004134 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004135 return -1;
4136 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004137 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4138 (flags&F_ALT) ? "#" : "",
4139 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004140 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004141 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004142}
4143
Tim Peters38fd5b62000-09-21 05:43:11 +00004144/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4145 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4146 * Python's regular ints.
4147 * Return value: a new PyString*, or NULL if error.
4148 * . *pbuf is set to point into it,
4149 * *plen set to the # of chars following that.
4150 * Caller must decref it when done using pbuf.
4151 * The string starting at *pbuf is of the form
4152 * "-"? ("0x" | "0X")? digit+
4153 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004154 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004155 * There will be at least prec digits, zero-filled on the left if
4156 * necessary to get that many.
4157 * val object to be converted
4158 * flags bitmask of format flags; only F_ALT is looked at
4159 * prec minimum number of digits; 0-fill on left if needed
4160 * type a character in [duoxX]; u acts the same as d
4161 *
4162 * CAUTION: o, x and X conversions on regular ints can never
4163 * produce a '-' sign, but can for Python's unbounded ints.
4164 */
4165PyObject*
4166_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4167 char **pbuf, int *plen)
4168{
4169 PyObject *result = NULL;
4170 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004171 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004172 int sign; /* 1 if '-', else 0 */
4173 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004174 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004175 int numdigits; /* len == numnondigits + numdigits */
4176 int numnondigits = 0;
4177
Guido van Rossumddefaf32007-01-14 03:31:43 +00004178 /* Avoid exceeding SSIZE_T_MAX */
4179 if (prec > PY_SSIZE_T_MAX-3) {
4180 PyErr_SetString(PyExc_OverflowError,
4181 "precision too large");
4182 return NULL;
4183 }
4184
4185
Tim Peters38fd5b62000-09-21 05:43:11 +00004186 switch (type) {
4187 case 'd':
4188 case 'u':
4189 result = val->ob_type->tp_str(val);
4190 break;
4191 case 'o':
4192 result = val->ob_type->tp_as_number->nb_oct(val);
4193 break;
4194 case 'x':
4195 case 'X':
4196 numnondigits = 2;
4197 result = val->ob_type->tp_as_number->nb_hex(val);
4198 break;
4199 default:
4200 assert(!"'type' not in [duoxX]");
4201 }
4202 if (!result)
4203 return NULL;
4204
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004205 buf = PyString_AsString(result);
4206 if (!buf) {
4207 Py_DECREF(result);
4208 return NULL;
4209 }
4210
Tim Peters38fd5b62000-09-21 05:43:11 +00004211 /* To modify the string in-place, there can only be one reference. */
4212 if (result->ob_refcnt != 1) {
4213 PyErr_BadInternalCall();
4214 return NULL;
4215 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004216 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004217 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004218 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4219 return NULL;
4220 }
4221 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004222 if (buf[len-1] == 'L') {
4223 --len;
4224 buf[len] = '\0';
4225 }
4226 sign = buf[0] == '-';
4227 numnondigits += sign;
4228 numdigits = len - numnondigits;
4229 assert(numdigits > 0);
4230
Tim Petersfff53252001-04-12 18:38:48 +00004231 /* Get rid of base marker unless F_ALT */
4232 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004233 /* Need to skip 0x, 0X or 0. */
4234 int skipped = 0;
4235 switch (type) {
4236 case 'o':
4237 assert(buf[sign] == '0');
4238 /* If 0 is only digit, leave it alone. */
4239 if (numdigits > 1) {
4240 skipped = 1;
4241 --numdigits;
4242 }
4243 break;
4244 case 'x':
4245 case 'X':
4246 assert(buf[sign] == '0');
4247 assert(buf[sign + 1] == 'x');
4248 skipped = 2;
4249 numnondigits -= 2;
4250 break;
4251 }
4252 if (skipped) {
4253 buf += skipped;
4254 len -= skipped;
4255 if (sign)
4256 buf[0] = '-';
4257 }
4258 assert(len == numnondigits + numdigits);
4259 assert(numdigits > 0);
4260 }
4261
4262 /* Fill with leading zeroes to meet minimum width. */
4263 if (prec > numdigits) {
4264 PyObject *r1 = PyString_FromStringAndSize(NULL,
4265 numnondigits + prec);
4266 char *b1;
4267 if (!r1) {
4268 Py_DECREF(result);
4269 return NULL;
4270 }
4271 b1 = PyString_AS_STRING(r1);
4272 for (i = 0; i < numnondigits; ++i)
4273 *b1++ = *buf++;
4274 for (i = 0; i < prec - numdigits; i++)
4275 *b1++ = '0';
4276 for (i = 0; i < numdigits; i++)
4277 *b1++ = *buf++;
4278 *b1 = '\0';
4279 Py_DECREF(result);
4280 result = r1;
4281 buf = PyString_AS_STRING(result);
4282 len = numnondigits + prec;
4283 }
4284
4285 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004286 if (type == 'X') {
4287 /* Need to convert all lower case letters to upper case.
4288 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004289 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004290 if (buf[i] >= 'a' && buf[i] <= 'x')
4291 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004292 }
4293 *pbuf = buf;
4294 *plen = len;
4295 return result;
4296}
4297
Thomas Wouters477c8d52006-05-27 19:21:47 +00004298Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004299formatint(char *buf, size_t buflen, int flags,
4300 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004301{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004302 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004303 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4304 + 1 + 1 = 24 */
4305 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004306 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004307 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004308
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004309 x = PyInt_AsLong(v);
4310 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004311 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4312 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004313 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004314 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004315 if (x < 0 && type == 'u') {
4316 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004317 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004318 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4319 sign = "-";
4320 else
4321 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004322 if (prec < 0)
4323 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004324
4325 if ((flags & F_ALT) &&
4326 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004327 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004328 * of issues that cause pain:
4329 * - when 0 is being converted, the C standard leaves off
4330 * the '0x' or '0X', which is inconsistent with other
4331 * %#x/%#X conversions and inconsistent with Python's
4332 * hex() function
4333 * - there are platforms that violate the standard and
4334 * convert 0 with the '0x' or '0X'
4335 * (Metrowerks, Compaq Tru64)
4336 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004337 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004338 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004339 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340 * We can achieve the desired consistency by inserting our
4341 * own '0x' or '0X' prefix, and substituting %x/%X in place
4342 * of %#x/%#X.
4343 *
4344 * Note that this is the same approach as used in
4345 * formatint() in unicodeobject.c
4346 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004347 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4348 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004349 }
4350 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004351 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4352 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004353 prec, type);
4354 }
4355
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004356 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4357 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004358 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004359 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004360 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004361 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004362 return -1;
4363 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004364 if (sign[0])
4365 PyOS_snprintf(buf, buflen, fmt, -x);
4366 else
4367 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004368 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004369}
4370
Thomas Wouters477c8d52006-05-27 19:21:47 +00004371Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004372formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004373{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004374 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004375 if (PyString_Check(v)) {
4376 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004377 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004378 }
4379 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004380 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004381 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004382 }
4383 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004384 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004385}
4386
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004387/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4388
4389 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4390 chars are formatted. XXX This is a magic number. Each formatting
4391 routine does bounds checking to ensure no overflow, but a better
4392 solution may be to malloc a buffer of appropriate size for each
4393 format. For now, the current solution is sufficient.
4394*/
4395#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004396
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004397PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004398PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004399{
4400 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004401 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004402 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004403 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004404 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004405 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004406 PyObject *dict = NULL;
4407 if (format == NULL || !PyString_Check(format) || args == NULL) {
4408 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004409 return NULL;
4410 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004411 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004412 fmt = PyString_AS_STRING(format);
4413 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004414 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004415 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004416 if (result == NULL)
4417 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004418 res = PyString_AsString(result);
4419 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004420 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004421 argidx = 0;
4422 }
4423 else {
4424 arglen = -1;
4425 argidx = -2;
4426 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004427 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4428 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004429 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004430 while (--fmtcnt >= 0) {
4431 if (*fmt != '%') {
4432 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004433 rescnt = fmtcnt + 100;
4434 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004435 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004436 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004437 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004438 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004439 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004440 }
4441 *res++ = *fmt++;
4442 }
4443 else {
4444 /* Got a format specifier */
4445 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004446 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004447 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004448 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004449 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004450 PyObject *v = NULL;
4451 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004452 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004453 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004454 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004455 char formatbuf[FORMATBUFLEN];
4456 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004457 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004458 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004459
Guido van Rossumda9c2711996-12-05 21:58:58 +00004460 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004461 if (*fmt == '(') {
4462 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004463 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004464 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004465 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004466
4467 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004468 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004469 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004470 goto error;
4471 }
4472 ++fmt;
4473 --fmtcnt;
4474 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004475 /* Skip over balanced parentheses */
4476 while (pcount > 0 && --fmtcnt >= 0) {
4477 if (*fmt == ')')
4478 --pcount;
4479 else if (*fmt == '(')
4480 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004481 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004482 }
4483 keylen = fmt - keystart - 1;
4484 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004486 "incomplete format key");
4487 goto error;
4488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 key = PyString_FromStringAndSize(keystart,
4490 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004491 if (key == NULL)
4492 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004493 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004494 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004495 args_owned = 0;
4496 }
4497 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004499 if (args == NULL) {
4500 goto error;
4501 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004502 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004503 arglen = -1;
4504 argidx = -2;
4505 }
Guido van Rossume5372401993-03-16 12:15:04 +00004506 while (--fmtcnt >= 0) {
4507 switch (c = *fmt++) {
4508 case '-': flags |= F_LJUST; continue;
4509 case '+': flags |= F_SIGN; continue;
4510 case ' ': flags |= F_BLANK; continue;
4511 case '#': flags |= F_ALT; continue;
4512 case '0': flags |= F_ZERO; continue;
4513 }
4514 break;
4515 }
4516 if (c == '*') {
4517 v = getnextarg(args, arglen, &argidx);
4518 if (v == NULL)
4519 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 if (!PyInt_Check(v)) {
4521 PyErr_SetString(PyExc_TypeError,
4522 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004523 goto error;
4524 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004525 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004526 if (width == -1 && PyErr_Occurred())
4527 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004528 if (width < 0) {
4529 flags |= F_LJUST;
4530 width = -width;
4531 }
Guido van Rossume5372401993-03-16 12:15:04 +00004532 if (--fmtcnt >= 0)
4533 c = *fmt++;
4534 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004535 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004536 width = c - '0';
4537 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004538 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004539 if (!isdigit(c))
4540 break;
4541 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 PyErr_SetString(
4543 PyExc_ValueError,
4544 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004545 goto error;
4546 }
4547 width = width*10 + (c - '0');
4548 }
4549 }
4550 if (c == '.') {
4551 prec = 0;
4552 if (--fmtcnt >= 0)
4553 c = *fmt++;
4554 if (c == '*') {
4555 v = getnextarg(args, arglen, &argidx);
4556 if (v == NULL)
4557 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004558 if (!PyInt_Check(v)) {
4559 PyErr_SetString(
4560 PyExc_TypeError,
4561 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004562 goto error;
4563 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004564 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004565 if (prec == -1 && PyErr_Occurred())
4566 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004567 if (prec < 0)
4568 prec = 0;
4569 if (--fmtcnt >= 0)
4570 c = *fmt++;
4571 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004572 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004573 prec = c - '0';
4574 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004575 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004576 if (!isdigit(c))
4577 break;
4578 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004579 PyErr_SetString(
4580 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004581 "prec too big");
4582 goto error;
4583 }
4584 prec = prec*10 + (c - '0');
4585 }
4586 }
4587 } /* prec */
4588 if (fmtcnt >= 0) {
4589 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004590 if (--fmtcnt >= 0)
4591 c = *fmt++;
4592 }
4593 }
4594 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 PyErr_SetString(PyExc_ValueError,
4596 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004597 goto error;
4598 }
4599 if (c != '%') {
4600 v = getnextarg(args, arglen, &argidx);
4601 if (v == NULL)
4602 goto error;
4603 }
4604 sign = 0;
4605 fill = ' ';
4606 switch (c) {
4607 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004608 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004609 len = 1;
4610 break;
4611 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004612 if (PyUnicode_Check(v)) {
4613 fmt = fmt_start;
4614 argidx = argidx_start;
4615 goto unicode;
4616 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004617 temp = _PyObject_Str(v);
4618 if (temp != NULL && PyUnicode_Check(temp)) {
4619 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004620 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004621 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004622 goto unicode;
4623 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004624 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004625 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004626 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004627 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004628 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004629 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004630 if (!PyString_Check(temp)) {
4631 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004632 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004633 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004634 goto error;
4635 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004636 pbuf = PyString_AS_STRING(temp);
4637 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004638 if (prec >= 0 && len > prec)
4639 len = prec;
4640 break;
4641 case 'i':
4642 case 'd':
4643 case 'u':
4644 case 'o':
4645 case 'x':
4646 case 'X':
4647 if (c == 'i')
4648 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004649 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004650 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004651 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004652 prec, c, &pbuf, &ilen);
4653 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004654 if (!temp)
4655 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004656 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004657 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004658 else {
4659 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004660 len = formatint(pbuf,
4661 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004662 flags, prec, c, v);
4663 if (len < 0)
4664 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004665 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004666 }
4667 if (flags & F_ZERO)
4668 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004669 break;
4670 case 'e':
4671 case 'E':
4672 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004673 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004674 case 'g':
4675 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004676 if (c == 'F')
4677 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004678 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004679 len = formatfloat(pbuf, sizeof(formatbuf),
4680 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004681 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004682 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004683 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004684 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004685 fill = '0';
4686 break;
4687 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004688 if (PyUnicode_Check(v)) {
4689 fmt = fmt_start;
4690 argidx = argidx_start;
4691 goto unicode;
4692 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004693 pbuf = formatbuf;
4694 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004695 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004696 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004697 break;
4698 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004699 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004700 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004701 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004702 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004703 (Py_ssize_t)(fmt - 1 -
4704 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004705 goto error;
4706 }
4707 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004708 if (*pbuf == '-' || *pbuf == '+') {
4709 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004710 len--;
4711 }
4712 else if (flags & F_SIGN)
4713 sign = '+';
4714 else if (flags & F_BLANK)
4715 sign = ' ';
4716 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004717 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004718 }
4719 if (width < len)
4720 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004721 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004722 reslen -= rescnt;
4723 rescnt = width + fmtcnt + 100;
4724 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004725 if (reslen < 0) {
4726 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004727 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004728 return PyErr_NoMemory();
4729 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004730 if (_PyString_Resize(&result, reslen) < 0) {
4731 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004732 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004733 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004734 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004735 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004736 }
4737 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004738 if (fill != ' ')
4739 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004740 rescnt--;
4741 if (width > len)
4742 width--;
4743 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004744 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4745 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004746 assert(pbuf[1] == c);
4747 if (fill != ' ') {
4748 *res++ = *pbuf++;
4749 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004750 }
Tim Petersfff53252001-04-12 18:38:48 +00004751 rescnt -= 2;
4752 width -= 2;
4753 if (width < 0)
4754 width = 0;
4755 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004756 }
4757 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004758 do {
4759 --rescnt;
4760 *res++ = fill;
4761 } while (--width > len);
4762 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004763 if (fill == ' ') {
4764 if (sign)
4765 *res++ = sign;
4766 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004767 (c == 'x' || c == 'X')) {
4768 assert(pbuf[0] == '0');
4769 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 *res++ = *pbuf++;
4771 *res++ = *pbuf++;
4772 }
4773 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004774 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004775 res += len;
4776 rescnt -= len;
4777 while (--width >= len) {
4778 --rescnt;
4779 *res++ = ' ';
4780 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004781 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004782 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004783 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004784 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004785 goto error;
4786 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004787 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004788 } /* '%' */
4789 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004790 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004791 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004792 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004793 goto error;
4794 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004795 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004796 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004797 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004798 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004799 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004800
4801 unicode:
4802 if (args_owned) {
4803 Py_DECREF(args);
4804 args_owned = 0;
4805 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004806 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004807 if (PyTuple_Check(orig_args) && argidx > 0) {
4808 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004809 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004810 v = PyTuple_New(n);
4811 if (v == NULL)
4812 goto error;
4813 while (--n >= 0) {
4814 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4815 Py_INCREF(w);
4816 PyTuple_SET_ITEM(v, n, w);
4817 }
4818 args = v;
4819 } else {
4820 Py_INCREF(orig_args);
4821 args = orig_args;
4822 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004823 args_owned = 1;
4824 /* Take what we have of the result and let the Unicode formatting
4825 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004826 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004827 if (_PyString_Resize(&result, rescnt))
4828 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004829 fmtcnt = PyString_GET_SIZE(format) - \
4830 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004831 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4832 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004833 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004834 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004835 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004836 if (v == NULL)
4837 goto error;
4838 /* Paste what we have (result) to what the Unicode formatting
4839 function returned (v) and return the result (or error) */
4840 w = PyUnicode_Concat(result, v);
4841 Py_DECREF(result);
4842 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004843 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004844 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004845
Guido van Rossume5372401993-03-16 12:15:04 +00004846 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004847 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004848 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004849 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004850 }
Guido van Rossume5372401993-03-16 12:15:04 +00004851 return NULL;
4852}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004853
Guido van Rossum2a61e741997-01-18 07:55:05 +00004854void
Fred Drakeba096332000-07-09 07:04:36 +00004855PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004856{
4857 register PyStringObject *s = (PyStringObject *)(*p);
4858 PyObject *t;
4859 if (s == NULL || !PyString_Check(s))
4860 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004861 /* If it's a string subclass, we don't really know what putting
4862 it in the interned dict might do. */
4863 if (!PyString_CheckExact(s))
4864 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004865 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004866 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004867 if (interned == NULL) {
4868 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004869 if (interned == NULL) {
4870 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004871 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004872 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004873 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004874 t = PyDict_GetItem(interned, (PyObject *)s);
4875 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004876 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004877 Py_DECREF(*p);
4878 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004879 return;
4880 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004881
Armin Rigo79f7ad22004-08-07 19:27:39 +00004882 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004883 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004884 return;
4885 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004886 /* The two references in interned are not counted by refcnt.
4887 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004888 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004889 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004890}
4891
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004892void
4893PyString_InternImmortal(PyObject **p)
4894{
4895 PyString_InternInPlace(p);
4896 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4897 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4898 Py_INCREF(*p);
4899 }
4900}
4901
Guido van Rossum2a61e741997-01-18 07:55:05 +00004902
4903PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004904PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004905{
4906 PyObject *s = PyString_FromString(cp);
4907 if (s == NULL)
4908 return NULL;
4909 PyString_InternInPlace(&s);
4910 return s;
4911}
4912
Guido van Rossum8cf04761997-08-02 02:57:45 +00004913void
Fred Drakeba096332000-07-09 07:04:36 +00004914PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004915{
4916 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004917 for (i = 0; i < UCHAR_MAX + 1; i++) {
4918 Py_XDECREF(characters[i]);
4919 characters[i] = NULL;
4920 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004921 Py_XDECREF(nullstring);
4922 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004923}
Barry Warsawa903ad982001-02-23 16:40:48 +00004924
Barry Warsawa903ad982001-02-23 16:40:48 +00004925void _Py_ReleaseInternedStrings(void)
4926{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927 PyObject *keys;
4928 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004929 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004930 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004931
4932 if (interned == NULL || !PyDict_Check(interned))
4933 return;
4934 keys = PyDict_Keys(interned);
4935 if (keys == NULL || !PyList_Check(keys)) {
4936 PyErr_Clear();
4937 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004938 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939
4940 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4941 detector, interned strings are not forcibly deallocated; rather, we
4942 give them their stolen references back, and then clear and DECREF
4943 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004944
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004946 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4947 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004948 for (i = 0; i < n; i++) {
4949 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4950 switch (s->ob_sstate) {
4951 case SSTATE_NOT_INTERNED:
4952 /* XXX Shouldn't happen */
4953 break;
4954 case SSTATE_INTERNED_IMMORTAL:
4955 s->ob_refcnt += 1;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004956 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957 break;
4958 case SSTATE_INTERNED_MORTAL:
4959 s->ob_refcnt += 2;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004960 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961 break;
4962 default:
4963 Py_FatalError("Inconsistent interned string state.");
4964 }
4965 s->ob_sstate = SSTATE_NOT_INTERNED;
4966 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004967 fprintf(stderr, "total size of all interned strings: "
4968 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4969 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004970 Py_DECREF(keys);
4971 PyDict_Clear(interned);
4972 Py_DECREF(interned);
4973 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004974}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004975
4976
4977/*********************** Str Iterator ****************************/
4978
4979typedef struct {
4980 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004981 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004982 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4983} striterobject;
4984
4985static void
4986striter_dealloc(striterobject *it)
4987{
4988 _PyObject_GC_UNTRACK(it);
4989 Py_XDECREF(it->it_seq);
4990 PyObject_GC_Del(it);
4991}
4992
4993static int
4994striter_traverse(striterobject *it, visitproc visit, void *arg)
4995{
4996 Py_VISIT(it->it_seq);
4997 return 0;
4998}
4999
5000static PyObject *
5001striter_next(striterobject *it)
5002{
5003 PyStringObject *seq;
5004 PyObject *item;
5005
5006 assert(it != NULL);
5007 seq = it->it_seq;
5008 if (seq == NULL)
5009 return NULL;
5010 assert(PyString_Check(seq));
5011
5012 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005013 item = PyString_FromStringAndSize(
5014 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005015 if (item != NULL)
5016 ++it->it_index;
5017 return item;
5018 }
5019
5020 Py_DECREF(seq);
5021 it->it_seq = NULL;
5022 return NULL;
5023}
5024
5025static PyObject *
5026striter_len(striterobject *it)
5027{
5028 Py_ssize_t len = 0;
5029 if (it->it_seq)
5030 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5031 return PyInt_FromSsize_t(len);
5032}
5033
Guido van Rossum49d6b072006-08-17 21:11:47 +00005034PyDoc_STRVAR(length_hint_doc,
5035 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005036
5037static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005038 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5039 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005040 {NULL, NULL} /* sentinel */
5041};
5042
5043PyTypeObject PyStringIter_Type = {
5044 PyObject_HEAD_INIT(&PyType_Type)
5045 0, /* ob_size */
Guido van Rossum49d6b072006-08-17 21:11:47 +00005046 "striterator", /* tp_name */
5047 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005048 0, /* tp_itemsize */
5049 /* methods */
5050 (destructor)striter_dealloc, /* tp_dealloc */
5051 0, /* tp_print */
5052 0, /* tp_getattr */
5053 0, /* tp_setattr */
5054 0, /* tp_compare */
5055 0, /* tp_repr */
5056 0, /* tp_as_number */
5057 0, /* tp_as_sequence */
5058 0, /* tp_as_mapping */
5059 0, /* tp_hash */
5060 0, /* tp_call */
5061 0, /* tp_str */
5062 PyObject_GenericGetAttr, /* tp_getattro */
5063 0, /* tp_setattro */
5064 0, /* tp_as_buffer */
5065 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5066 0, /* tp_doc */
5067 (traverseproc)striter_traverse, /* tp_traverse */
5068 0, /* tp_clear */
5069 0, /* tp_richcompare */
5070 0, /* tp_weaklistoffset */
5071 PyObject_SelfIter, /* tp_iter */
5072 (iternextfunc)striter_next, /* tp_iternext */
5073 striter_methods, /* tp_methods */
5074 0,
5075};
5076
5077static PyObject *
5078str_iter(PyObject *seq)
5079{
5080 striterobject *it;
5081
5082 if (!PyString_Check(seq)) {
5083 PyErr_BadInternalCall();
5084 return NULL;
5085 }
5086 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5087 if (it == NULL)
5088 return NULL;
5089 it->it_index = 0;
5090 Py_INCREF(seq);
5091 it->it_seq = (PyStringObject *)seq;
5092 _PyObject_GC_TRACK(it);
5093 return (PyObject *)it;
5094}