blob: dcecd6f60a87b113fe244022a73208daba5bc556 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
417 v->ob_type->tp_name);
418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
490 v->ob_type->tp_name);
491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
510 op->ob_refcnt = 3;
511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000522 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000696 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732 "%.200s found", obj->ob_type->tp_name);
733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767static int
Fred Drakeba096332000-07-09 07:04:36 +0000768string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000770 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000773
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000774 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000775 if (! PyString_CheckExact(op)) {
776 int ret;
777 /* A str subclass may have its own __str__ method. */
778 op = (PyStringObject *) PyObject_Str((PyObject *)op);
779 if (op == NULL)
780 return -1;
781 ret = string_print(op, fp, flags);
782 Py_DECREF(op);
783 return ret;
784 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000786 char *data = op->ob_sval;
787 Py_ssize_t size = op->ob_size;
788 while (size > INT_MAX) {
789 /* Very long strings cannot be written atomically.
790 * But don't write exactly INT_MAX bytes at a time
791 * to avoid memory aligment issues.
792 */
793 const int chunk_size = INT_MAX & ~0x3FFF;
794 fwrite(data, 1, chunk_size, fp);
795 data += chunk_size;
796 size -= chunk_size;
797 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000798#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000799 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000800#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000801 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000802#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000803 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Thomas Wouters7e474022000-07-16 12:04:32 +0000806 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000807 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000808 if (memchr(op->ob_sval, '\'', op->ob_size) &&
809 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000810 quote = '"';
811
812 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 for (i = 0; i < op->ob_size; i++) {
814 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000816 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000817 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000818 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000819 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000821 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000822 fprintf(fp, "\\r");
823 else if (c < ' ' || c >= 0x7f)
824 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000829 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830}
831
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000832PyObject *
833PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000835 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000837 Py_ssize_t length = PyString_GET_SIZE(op);
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000838 size_t newsize = 3 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000839 PyObject *v;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000840 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000841 PyErr_SetString(PyExc_OverflowError,
842 "string is too large to make repr");
843 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000844 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000846 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
848 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000849 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000850 register Py_UNICODE c;
851 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000852 int quote;
853
Thomas Wouters7e474022000-07-16 12:04:32 +0000854 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000856 if (smartquotes) {
857 Py_UNICODE *test;
858 for (test = p; test < p+length; ++test) {
859 if (*test == '"') {
860 quote = '\''; /* switch back to single quote */
861 goto decided;
862 }
863 else if (*test == '\'')
864 quote = '"';
865 }
866 decided:
867 ;
868 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000869
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000870 *p++ = 's', *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000872 /* There's at least enough room for a hex escape
873 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000874 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000878 else if (c == '\t')
879 *p++ = '\\', *p++ = 't';
880 else if (c == '\n')
881 *p++ = '\\', *p++ = 'n';
882 else if (c == '\r')
883 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000884 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000885 *p++ = '\\';
886 *p++ = 'x';
887 *p++ = hexdigits[(c & 0xf0) >> 4];
888 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000890 else
891 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000893 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000896 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
897 Py_DECREF(v);
898 return NULL;
899 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000900 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902}
903
Guido van Rossum189f1df2001-05-01 16:51:53 +0000904static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000905string_repr(PyObject *op)
906{
907 return PyString_Repr(op, 1);
908}
909
910static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000911string_str(PyObject *s)
912{
Tim Petersc9933152001-10-16 20:18:24 +0000913 assert(PyString_Check(s));
914 if (PyString_CheckExact(s)) {
915 Py_INCREF(s);
916 return s;
917 }
918 else {
919 /* Subtype -- return genuine string with the same value. */
920 PyStringObject *t = (PyStringObject *) s;
921 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
922 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000923}
924
Martin v. Löwis18e16552006-02-15 17:27:45 +0000925static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000926string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 return a->ob_size;
929}
930
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000932string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000934 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000935 register PyStringObject *op;
936 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000937 if (PyUnicode_Check(bb))
938 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000939 if (PyBytes_Check(bb))
940 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000941 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000942 "cannot concatenate 'str8' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000943 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944 return NULL;
945 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000948 if ((a->ob_size == 0 || b->ob_size == 0) &&
949 PyString_CheckExact(a) && PyString_CheckExact(b)) {
950 if (a->ob_size == 0) {
951 Py_INCREF(bb);
952 return bb;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 Py_INCREF(a);
955 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956 }
957 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000958 if (size < 0) {
959 PyErr_SetString(PyExc_OverflowError,
960 "strings are too large to concat");
961 return NULL;
962 }
963
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000964 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000965 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000966 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000968 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000969 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000970 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000971 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
972 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000973 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000975#undef b
976}
977
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000978static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000979string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000980{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000981 register Py_ssize_t i;
982 register Py_ssize_t j;
983 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000985 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 if (n < 0)
987 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000988 /* watch out for overflows: the size can overflow int,
989 * and the # of bytes needed can overflow size_t
990 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000992 if (n && size / n != a->ob_size) {
993 PyErr_SetString(PyExc_OverflowError,
994 "repeated string is too long");
995 return NULL;
996 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000997 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 Py_INCREF(a);
999 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000 }
Tim Peterse7c05322004-06-27 17:24:49 +00001001 nbytes = (size_t)size;
1002 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001003 PyErr_SetString(PyExc_OverflowError,
1004 "repeated string is too long");
1005 return NULL;
1006 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001007 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001008 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001009 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001011 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001012 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001013 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001014 op->ob_sval[size] = '\0';
1015 if (a->ob_size == 1 && n > 0) {
1016 memset(op->ob_sval, a->ob_sval[0] , n);
1017 return (PyObject *) op;
1018 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001019 i = 0;
1020 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001021 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001022 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001023 }
1024 while (i < size) {
1025 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001026 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001027 i += j;
1028 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001029 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001030}
1031
1032/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1033
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001034static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001035string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001036 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001037 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001038{
1039 if (i < 0)
1040 i = 0;
1041 if (j < 0)
1042 j = 0; /* Avoid signed/unsigned bug in next line */
1043 if (j > a->ob_size)
1044 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001045 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1046 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001047 Py_INCREF(a);
1048 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001049 }
1050 if (j < i)
1051 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001052 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001053}
1054
Guido van Rossum9284a572000-03-07 15:53:43 +00001055static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001057{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001058 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 if (PyUnicode_Check(sub_obj))
1060 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001061 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001062 PyErr_Format(PyExc_TypeError,
1063 "'in <string>' requires string as left operand, "
1064 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001065 return -1;
1066 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001067 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001068
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001070}
1071
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001072static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001078 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001079 return NULL;
1080 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001081 pchar = a->ob_sval[i];
1082 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001083 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001084 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001085 else {
1086#ifdef COUNT_ALLOCS
1087 one_strings++;
1088#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001089 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001090 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001091 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092}
1093
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094static PyObject*
1095string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001098 Py_ssize_t len_a, len_b;
1099 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 PyObject *result;
1101
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001102 /* Make sure both arguments are strings. */
1103 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 result = Py_NotImplemented;
1105 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107 if (a == b) {
1108 switch (op) {
1109 case Py_EQ:case Py_LE:case Py_GE:
1110 result = Py_True;
1111 goto out;
1112 case Py_NE:case Py_LT:case Py_GT:
1113 result = Py_False;
1114 goto out;
1115 }
1116 }
1117 if (op == Py_EQ) {
1118 /* Supporting Py_NE here as well does not save
1119 much time, since Py_NE is rarely used. */
1120 if (a->ob_size == b->ob_size
1121 && (a->ob_sval[0] == b->ob_sval[0]
Thomas Wouters27d517b2007-02-25 20:39:11 +00001122 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001135 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164}
1165
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166static long
Fred Drakeba096332000-07-09 07:04:36 +00001167string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 register unsigned char *p;
1171 register long x;
1172
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 if (a->ob_shash != -1)
1174 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001179 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 return x;
1185}
1186
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001187static PyObject*
1188string_subscript(PyStringObject* self, PyObject* item)
1189{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001190 if (PyIndex_Check(item)) {
1191 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001192 if (i == -1 && PyErr_Occurred())
1193 return NULL;
1194 if (i < 0)
1195 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001196 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001197 }
1198 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001199 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200 char* source_buf;
1201 char* result_buf;
1202 PyObject* result;
1203
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001204 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001205 PyString_GET_SIZE(self),
1206 &start, &stop, &step, &slicelength) < 0) {
1207 return NULL;
1208 }
1209
1210 if (slicelength <= 0) {
1211 return PyString_FromStringAndSize("", 0);
1212 }
1213 else {
1214 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001215 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001216 if (result_buf == NULL)
1217 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001218
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001219 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 cur += step, i++) {
1221 result_buf[i] = source_buf[cur];
1222 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001223
1224 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001225 slicelength);
1226 PyMem_Free(result_buf);
1227 return result;
1228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001229 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001231 PyErr_Format(PyExc_TypeError,
1232 "string indices must be integers, not %.200s",
1233 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 return NULL;
1235 }
1236}
1237
Martin v. Löwis18e16552006-02-15 17:27:45 +00001238static Py_ssize_t
1239string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001240{
1241 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001242 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001243 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001244 return -1;
1245 }
1246 *ptr = (void *)self->ob_sval;
1247 return self->ob_size;
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
1251string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252{
Guido van Rossum045e6881997-09-08 18:30:11 +00001253 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001254 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255 return -1;
1256}
1257
Martin v. Löwis18e16552006-02-15 17:27:45 +00001258static Py_ssize_t
1259string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001260{
1261 if ( lenp )
1262 *lenp = self->ob_size;
1263 return 1;
1264}
1265
Martin v. Löwis18e16552006-02-15 17:27:45 +00001266static Py_ssize_t
1267string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001268{
1269 if ( index != 0 ) {
1270 PyErr_SetString(PyExc_SystemError,
1271 "accessing non-existent string segment");
1272 return -1;
1273 }
1274 *ptr = self->ob_sval;
1275 return self->ob_size;
1276}
1277
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001278static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001279 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001280 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281 (ssizeargfunc)string_repeat, /*sq_repeat*/
1282 (ssizeargfunc)string_item, /*sq_item*/
1283 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001284 0, /*sq_ass_item*/
1285 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001286 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001287};
1288
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001289static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001290 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001291 (binaryfunc)string_subscript,
1292 0,
1293};
1294
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001295static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001296 (readbufferproc)string_buffer_getreadbuf,
1297 (writebufferproc)string_buffer_getwritebuf,
1298 (segcountproc)string_buffer_getsegcount,
1299 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001300};
1301
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302
1303
1304#define LEFTSTRIP 0
1305#define RIGHTSTRIP 1
1306#define BOTHSTRIP 2
1307
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001308/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001309static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1310
1311#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001312
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313
1314/* Don't call if length < 2 */
1315#define Py_STRING_MATCH(target, offset, pattern, length) \
1316 (target[offset] == pattern[0] && \
1317 target[offset+length-1] == pattern[length-1] && \
1318 !memcmp(target+offset+1, pattern+1, length-2) )
1319
1320
1321/* Overallocate the initial list to reduce the number of reallocs for small
1322 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1323 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1324 text (roughly 11 words per line) and field delimited data (usually 1-10
1325 fields). For large strings the split algorithms are bandwidth limited
1326 so increasing the preallocation likely will not improve things.*/
1327
1328#define MAX_PREALLOC 12
1329
1330/* 5 splits gives 6 elements */
1331#define PREALLOC_SIZE(maxsplit) \
1332 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1333
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334#define SPLIT_APPEND(data, left, right) \
1335 str = PyString_FromStringAndSize((data) + (left), \
1336 (right) - (left)); \
1337 if (str == NULL) \
1338 goto onError; \
1339 if (PyList_Append(list, str)) { \
1340 Py_DECREF(str); \
1341 goto onError; \
1342 } \
1343 else \
1344 Py_DECREF(str);
1345
Thomas Wouters477c8d52006-05-27 19:21:47 +00001346#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 str = PyString_FromStringAndSize((data) + (left), \
1348 (right) - (left)); \
1349 if (str == NULL) \
1350 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 if (count < MAX_PREALLOC) { \
1352 PyList_SET_ITEM(list, count, str); \
1353 } else { \
1354 if (PyList_Append(list, str)) { \
1355 Py_DECREF(str); \
1356 goto onError; \
1357 } \
1358 else \
1359 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001360 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001361 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362
Thomas Wouters477c8d52006-05-27 19:21:47 +00001363/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001364#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365
1366#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1367#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1368#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1369#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1370
1371Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001374 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001375 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001376 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001377
1378 if (list == NULL)
1379 return NULL;
1380
Thomas Wouters477c8d52006-05-27 19:21:47 +00001381 i = j = 0;
1382
1383 while (maxsplit-- > 0) {
1384 SKIP_SPACE(s, i, len);
1385 if (i==len) break;
1386 j = i; i++;
1387 SKIP_NONSPACE(s, i, len);
1388 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001390
1391 if (i < len) {
1392 /* Only occurs when maxsplit was reached */
1393 /* Skip any remaining whitespace and copy to end of string */
1394 SKIP_SPACE(s, i, len);
1395 if (i != len)
1396 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001397 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001398 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 Py_DECREF(list);
1402 return NULL;
1403}
1404
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001406split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001407{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001410 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411
1412 if (list == NULL)
1413 return NULL;
1414
Thomas Wouters477c8d52006-05-27 19:21:47 +00001415 i = j = 0;
1416 while ((j < len) && (maxcount-- > 0)) {
1417 for(; j<len; j++) {
1418 /* I found that using memchr makes no difference */
1419 if (s[j] == ch) {
1420 SPLIT_ADD(s, i, j);
1421 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001423 }
1424 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001426 if (i <= len) {
1427 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430 return list;
1431
1432 onError:
1433 Py_DECREF(list);
1434 return NULL;
1435}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001437PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438"S.split([sep [,maxsplit]]) -> list of strings\n\
1439\n\
1440Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001441delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001442splits are done. If sep is not specified or is None, any\n\
1443whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444
1445static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001446string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001448 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001449 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001451 PyObject *list, *str, *subobj = Py_None;
1452#ifdef USE_FAST
1453 Py_ssize_t pos;
1454#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001458 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001460 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1465 }
1466 else if (PyUnicode_Check(subobj))
1467 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1468 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1469 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 if (n == 0) {
1472 PyErr_SetString(PyExc_ValueError, "empty separator");
1473 return NULL;
1474 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001475 else if (n == 1)
1476 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477
Thomas Wouters477c8d52006-05-27 19:21:47 +00001478 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 if (list == NULL)
1480 return NULL;
1481
Thomas Wouters477c8d52006-05-27 19:21:47 +00001482#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001484 while (maxsplit-- > 0) {
1485 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1486 if (pos < 0)
1487 break;
1488 j = i+pos;
1489 SPLIT_ADD(s, i, j);
1490 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001492#else
1493 i = j = 0;
1494 while ((j+n <= len) && (maxsplit-- > 0)) {
1495 for (; j+n <= len; j++) {
1496 if (Py_STRING_MATCH(s, j, sub, n)) {
1497 SPLIT_ADD(s, i, j);
1498 i = j = j + n;
1499 break;
1500 }
1501 }
1502 }
1503#endif
1504 SPLIT_ADD(s, i, len);
1505 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 return list;
1507
Thomas Wouters477c8d52006-05-27 19:21:47 +00001508 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 Py_DECREF(list);
1510 return NULL;
1511}
1512
Thomas Wouters477c8d52006-05-27 19:21:47 +00001513PyDoc_STRVAR(partition__doc__,
1514"S.partition(sep) -> (head, sep, tail)\n\
1515\n\
1516Searches for the separator sep in S, and returns the part before it,\n\
1517the separator itself, and the part after it. If the separator is not\n\
1518found, returns S and two empty strings.");
1519
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001520static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001521string_partition(PyStringObject *self, PyObject *sep_obj)
1522{
1523 const char *sep;
1524 Py_ssize_t sep_len;
1525
1526 if (PyString_Check(sep_obj)) {
1527 sep = PyString_AS_STRING(sep_obj);
1528 sep_len = PyString_GET_SIZE(sep_obj);
1529 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001530 else if (PyUnicode_Check(sep_obj))
1531 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001532 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533 return NULL;
1534
1535 return stringlib_partition(
1536 (PyObject*) self,
1537 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538 sep_obj, sep, sep_len
1539 );
1540}
1541
1542PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001543"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001544\n\
1545Searches for the separator sep in S, starting at the end of S, and returns\n\
1546the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001547separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548
1549static PyObject *
1550string_rpartition(PyStringObject *self, PyObject *sep_obj)
1551{
1552 const char *sep;
1553 Py_ssize_t sep_len;
1554
1555 if (PyString_Check(sep_obj)) {
1556 sep = PyString_AS_STRING(sep_obj);
1557 sep_len = PyString_GET_SIZE(sep_obj);
1558 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001559 else if (PyUnicode_Check(sep_obj))
1560 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001561 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1562 return NULL;
1563
1564 return stringlib_rpartition(
1565 (PyObject*) self,
1566 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1567 sep_obj, sep, sep_len
1568 );
1569}
1570
1571Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001572rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001573{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001574 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001575 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001576 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001577
1578 if (list == NULL)
1579 return NULL;
1580
Thomas Wouters477c8d52006-05-27 19:21:47 +00001581 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001582
Thomas Wouters477c8d52006-05-27 19:21:47 +00001583 while (maxsplit-- > 0) {
1584 RSKIP_SPACE(s, i);
1585 if (i<0) break;
1586 j = i; i--;
1587 RSKIP_NONSPACE(s, i);
1588 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001589 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001590 if (i >= 0) {
1591 /* Only occurs when maxsplit was reached */
1592 /* Skip any remaining whitespace and copy to beginning of string */
1593 RSKIP_SPACE(s, i);
1594 if (i >= 0)
1595 SPLIT_ADD(s, 0, i + 1);
1596
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001598 FIX_PREALLOC_SIZE(list);
1599 if (PyList_Reverse(list) < 0)
1600 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001601 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001602 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001603 Py_DECREF(list);
1604 return NULL;
1605}
1606
Thomas Wouters477c8d52006-05-27 19:21:47 +00001607Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001608rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001609{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001611 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001612 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001613
1614 if (list == NULL)
1615 return NULL;
1616
Thomas Wouters477c8d52006-05-27 19:21:47 +00001617 i = j = len - 1;
1618 while ((i >= 0) && (maxcount-- > 0)) {
1619 for (; i >= 0; i--) {
1620 if (s[i] == ch) {
1621 SPLIT_ADD(s, i + 1, j + 1);
1622 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001623 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001624 }
1625 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001626 }
1627 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001628 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001630 FIX_PREALLOC_SIZE(list);
1631 if (PyList_Reverse(list) < 0)
1632 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633 return list;
1634
1635 onError:
1636 Py_DECREF(list);
1637 return NULL;
1638}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001639
1640PyDoc_STRVAR(rsplit__doc__,
1641"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1642\n\
1643Return a list of the words in the string S, using sep as the\n\
1644delimiter string, starting at the end of the string and working\n\
1645to the front. If maxsplit is given, at most maxsplit splits are\n\
1646done. If sep is not specified or is None, any whitespace string\n\
1647is a separator.");
1648
1649static PyObject *
1650string_rsplit(PyStringObject *self, PyObject *args)
1651{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001652 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001653 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001654 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001655 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001656
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001657 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001658 return NULL;
1659 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001660 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001661 if (subobj == Py_None)
1662 return rsplit_whitespace(s, len, maxsplit);
1663 if (PyString_Check(subobj)) {
1664 sub = PyString_AS_STRING(subobj);
1665 n = PyString_GET_SIZE(subobj);
1666 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001667 else if (PyUnicode_Check(subobj))
1668 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001669 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1670 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001671
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001672 if (n == 0) {
1673 PyErr_SetString(PyExc_ValueError, "empty separator");
1674 return NULL;
1675 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001676 else if (n == 1)
1677 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678
Thomas Wouters477c8d52006-05-27 19:21:47 +00001679 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 if (list == NULL)
1681 return NULL;
1682
1683 j = len;
1684 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001685
Thomas Wouters477c8d52006-05-27 19:21:47 +00001686 while ( (i >= 0) && (maxsplit-- > 0) ) {
1687 for (; i>=0; i--) {
1688 if (Py_STRING_MATCH(s, i, sub, n)) {
1689 SPLIT_ADD(s, i + n, j);
1690 j = i;
1691 i -= n;
1692 break;
1693 }
1694 }
1695 }
1696 SPLIT_ADD(s, 0, j);
1697 FIX_PREALLOC_SIZE(list);
1698 if (PyList_Reverse(list) < 0)
1699 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 return list;
1701
Thomas Wouters477c8d52006-05-27 19:21:47 +00001702onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703 Py_DECREF(list);
1704 return NULL;
1705}
1706
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001708PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709"S.join(sequence) -> string\n\
1710\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001711Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001712sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713
1714static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001715string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716{
1717 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001718 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001721 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001722 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001723 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001724 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725
Tim Peters19fe14e2001-01-19 03:03:47 +00001726 seq = PySequence_Fast(orig, "");
1727 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001728 return NULL;
1729 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001730
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001731 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001732 if (seqlen == 0) {
1733 Py_DECREF(seq);
1734 return PyString_FromString("");
1735 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001737 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001738 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1739 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001740 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001741 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001744
Raymond Hettinger674f2412004-08-23 23:23:54 +00001745 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001746 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001747 * Do a pre-pass to figure out the total amount of space we'll
1748 * need (sz), see whether any argument is absurd, and defer to
1749 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001750 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001751 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001753 item = PySequence_Fast_GET_ITEM(seq, i);
1754 if (!PyString_Check(item)){
1755 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001756 /* Defer to Unicode join.
1757 * CAUTION: There's no gurantee that the
1758 * original sequence can be iterated over
1759 * again, so we must pass seq here.
1760 */
1761 PyObject *result;
1762 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001763 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001764 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001765 }
1766 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001767 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001768 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001769 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001770 Py_DECREF(seq);
1771 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001772 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 sz += PyString_GET_SIZE(item);
1774 if (i != 0)
1775 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001776 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001777 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001778 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001779 Py_DECREF(seq);
1780 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 }
1783
1784 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001785 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001786 if (res == NULL) {
1787 Py_DECREF(seq);
1788 return NULL;
1789 }
1790
1791 /* Catenate everything. */
1792 p = PyString_AS_STRING(res);
1793 for (i = 0; i < seqlen; ++i) {
1794 size_t n;
1795 item = PySequence_Fast_GET_ITEM(seq, i);
1796 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001797 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001798 p += n;
1799 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001800 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001801 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001802 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001804
Jeremy Hylton49048292000-07-11 03:28:17 +00001805 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807}
1808
Tim Peters52e155e2001-06-16 05:42:57 +00001809PyObject *
1810_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001811{
Tim Petersa7259592001-06-16 05:11:17 +00001812 assert(sep != NULL && PyString_Check(sep));
1813 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001814 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001815}
1816
Thomas Wouters477c8d52006-05-27 19:21:47 +00001817Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001819{
1820 if (*end > len)
1821 *end = len;
1822 else if (*end < 0)
1823 *end += len;
1824 if (*end < 0)
1825 *end = 0;
1826 if (*start < 0)
1827 *start += len;
1828 if (*start < 0)
1829 *start = 0;
1830}
1831
Thomas Wouters477c8d52006-05-27 19:21:47 +00001832Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001833string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001836 const char *sub;
1837 Py_ssize_t sub_len;
1838 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
Thomas Wouters477c8d52006-05-27 19:21:47 +00001840 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1841 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001842 return -2;
1843 if (PyString_Check(subobj)) {
1844 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001845 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001846 }
1847 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001848 return PyUnicode_Find(
1849 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001850 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001851 /* XXX - the "expected a character buffer object" is pretty
1852 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 return -2;
1854
Thomas Wouters477c8d52006-05-27 19:21:47 +00001855 if (dir > 0)
1856 return stringlib_find_slice(
1857 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1858 sub, sub_len, start, end);
1859 else
1860 return stringlib_rfind_slice(
1861 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1862 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863}
1864
1865
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001866PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867"S.find(sub [,start [,end]]) -> int\n\
1868\n\
1869Return the lowest index in S where substring sub is found,\n\
1870such that sub is contained within s[start,end]. Optional\n\
1871arguments start and end are interpreted as in slice notation.\n\
1872\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001873Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874
1875static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001876string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001878 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 if (result == -2)
1880 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001881 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882}
1883
1884
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001885PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886"S.index(sub [,start [,end]]) -> int\n\
1887\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889
1890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001891string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 if (result == -2)
1895 return NULL;
1896 if (result == -1) {
1897 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001898 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899 return NULL;
1900 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001901 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902}
1903
1904
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001905PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906"S.rfind(sub [,start [,end]]) -> int\n\
1907\n\
1908Return the highest index in S where substring sub is found,\n\
1909such that sub is contained within s[start,end]. Optional\n\
1910arguments start and end are interpreted as in slice notation.\n\
1911\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001912Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
1914static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001915string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 if (result == -2)
1919 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001920 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921}
1922
1923
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001924PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925"S.rindex(sub [,start [,end]]) -> int\n\
1926\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001927Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001930string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 if (result == -2)
1934 return NULL;
1935 if (result == -1) {
1936 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001937 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938 return NULL;
1939 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001940 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941}
1942
1943
Thomas Wouters477c8d52006-05-27 19:21:47 +00001944Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001945do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1946{
1947 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001948 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001950 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1951 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001952
1953 i = 0;
1954 if (striptype != RIGHTSTRIP) {
1955 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1956 i++;
1957 }
1958 }
1959
1960 j = len;
1961 if (striptype != LEFTSTRIP) {
1962 do {
1963 j--;
1964 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1965 j++;
1966 }
1967
1968 if (i == 0 && j == len && PyString_CheckExact(self)) {
1969 Py_INCREF(self);
1970 return (PyObject*)self;
1971 }
1972 else
1973 return PyString_FromStringAndSize(s+i, j-i);
1974}
1975
1976
Thomas Wouters477c8d52006-05-27 19:21:47 +00001977Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001978do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979{
1980 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 i = 0;
1984 if (striptype != RIGHTSTRIP) {
1985 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1986 i++;
1987 }
1988 }
1989
1990 j = len;
1991 if (striptype != LEFTSTRIP) {
1992 do {
1993 j--;
1994 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1995 j++;
1996 }
1997
Tim Peters8fa5dd02001-09-12 02:18:30 +00001998 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999 Py_INCREF(self);
2000 return (PyObject*)self;
2001 }
2002 else
2003 return PyString_FromStringAndSize(s+i, j-i);
2004}
2005
2006
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002008do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2009{
2010 PyObject *sep = NULL;
2011
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002012 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002013 return NULL;
2014
2015 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002016 if (PyString_Check(sep))
2017 return do_xstrip(self, striptype, sep);
2018 else if (PyUnicode_Check(sep)) {
2019 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2020 PyObject *res;
2021 if (uniself==NULL)
2022 return NULL;
2023 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2024 striptype, sep);
2025 Py_DECREF(uniself);
2026 return res;
2027 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002028 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002029 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002030 STRIPNAME(striptype));
2031 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002032 }
2033
2034 return do_strip(self, striptype);
2035}
2036
2037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002039"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040\n\
2041Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002042whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002043If chars is given and not None, remove characters in chars instead.\n\
2044If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045
2046static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002047string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002049 if (PyTuple_GET_SIZE(args) == 0)
2050 return do_strip(self, BOTHSTRIP); /* Common case */
2051 else
2052 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002057"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002059Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002060If chars is given and not None, remove characters in chars instead.\n\
2061If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062
2063static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002066 if (PyTuple_GET_SIZE(args) == 0)
2067 return do_strip(self, LEFTSTRIP); /* Common case */
2068 else
2069 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070}
2071
2072
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002073PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002074"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002076Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002077If chars is given and not None, remove characters in chars instead.\n\
2078If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
2080static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083 if (PyTuple_GET_SIZE(args) == 0)
2084 return do_strip(self, RIGHTSTRIP); /* Common case */
2085 else
2086 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087}
2088
2089
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002090PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091"S.lower() -> string\n\
2092\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
Thomas Wouters477c8d52006-05-27 19:21:47 +00002095/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2096#ifndef _tolower
2097#define _tolower tolower
2098#endif
2099
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002101string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002103 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002104 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002105 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002107 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002108 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002110
2111 s = PyString_AS_STRING(newobj);
2112
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002113 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002116 int c = Py_CHARMASK(s[i]);
2117 if (isupper(c))
2118 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002120
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002121 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122}
2123
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125"S.upper() -> string\n\
2126\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002127Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128
Thomas Wouters477c8d52006-05-27 19:21:47 +00002129#ifndef _toupper
2130#define _toupper toupper
2131#endif
2132
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002134string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002136 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002137 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002138 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002140 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002141 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002143
2144 s = PyString_AS_STRING(newobj);
2145
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002146 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002149 int c = Py_CHARMASK(s[i]);
2150 if (islower(c))
2151 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002154 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155}
2156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158"S.title() -> string\n\
2159\n\
2160Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002161characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162
2163static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165{
2166 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002168 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002169 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002171 newobj = PyString_FromStringAndSize(NULL, n);
2172 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002173 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002174 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 for (i = 0; i < n; i++) {
2176 int c = Py_CHARMASK(*s++);
2177 if (islower(c)) {
2178 if (!previous_is_cased)
2179 c = toupper(c);
2180 previous_is_cased = 1;
2181 } else if (isupper(c)) {
2182 if (previous_is_cased)
2183 c = tolower(c);
2184 previous_is_cased = 1;
2185 } else
2186 previous_is_cased = 0;
2187 *s_new++ = c;
2188 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002189 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190}
2191
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002192PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193"S.capitalize() -> string\n\
2194\n\
2195Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002196capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197
2198static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002199string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200{
2201 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002202 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002203 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002205 newobj = PyString_FromStringAndSize(NULL, n);
2206 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002208 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 if (0 < n) {
2210 int c = Py_CHARMASK(*s++);
2211 if (islower(c))
2212 *s_new = toupper(c);
2213 else
2214 *s_new = c;
2215 s_new++;
2216 }
2217 for (i = 1; i < n; i++) {
2218 int c = Py_CHARMASK(*s++);
2219 if (isupper(c))
2220 *s_new = tolower(c);
2221 else
2222 *s_new = c;
2223 s_new++;
2224 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002225 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226}
2227
2228
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002229PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230"S.count(sub[, start[, end]]) -> int\n\
2231\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002232Return the number of non-overlapping occurrences of substring sub in\n\
2233string S[start:end]. Optional arguments start and end are interpreted\n\
2234as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235
2236static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002237string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002239 PyObject *sub_obj;
2240 const char *str = PyString_AS_STRING(self), *sub;
2241 Py_ssize_t sub_len;
2242 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2245 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002247
Thomas Wouters477c8d52006-05-27 19:21:47 +00002248 if (PyString_Check(sub_obj)) {
2249 sub = PyString_AS_STRING(sub_obj);
2250 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002251 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002253 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002255 if (count == -1)
2256 return NULL;
2257 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002258 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002259 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 return NULL;
2262
Thomas Wouters477c8d52006-05-27 19:21:47 +00002263 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002264
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265 return PyInt_FromSsize_t(
2266 stringlib_count(str + start, end - start, sub, sub_len)
2267 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268}
2269
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002270PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271"S.swapcase() -> string\n\
2272\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002274converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
2276static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002277string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278{
2279 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002280 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002281 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002283 newobj = PyString_FromStringAndSize(NULL, n);
2284 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002286 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 for (i = 0; i < n; i++) {
2288 int c = Py_CHARMASK(*s++);
2289 if (islower(c)) {
2290 *s_new = toupper(c);
2291 }
2292 else if (isupper(c)) {
2293 *s_new = tolower(c);
2294 }
2295 else
2296 *s_new = c;
2297 s_new++;
2298 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002299 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300}
2301
2302
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002303PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304"S.translate(table [,deletechars]) -> string\n\
2305\n\
2306Return a copy of the string S, where all characters occurring\n\
2307in the optional argument deletechars are removed, and the\n\
2308remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002309translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310
2311static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002312string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002315 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002316 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002318 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002319 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 PyObject *result;
2321 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002324 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327
2328 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002329 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002330 tablen = PyString_GET_SIZE(tableobj);
2331 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002332 else if (tableobj == Py_None) {
2333 table = NULL;
2334 tablen = 256;
2335 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002337 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338 parameter; instead a mapping to None will cause characters
2339 to be deleted. */
2340 if (delobj != NULL) {
2341 PyErr_SetString(PyExc_TypeError,
2342 "deletions are implemented differently for unicode");
2343 return NULL;
2344 }
2345 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2346 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002347 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349
Martin v. Löwis00b61272002-12-12 20:03:19 +00002350 if (tablen != 256) {
2351 PyErr_SetString(PyExc_ValueError,
2352 "translation table must be 256 characters long");
2353 return NULL;
2354 }
2355
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 if (delobj != NULL) {
2357 if (PyString_Check(delobj)) {
2358 del_table = PyString_AS_STRING(delobj);
2359 dellen = PyString_GET_SIZE(delobj);
2360 }
2361 else if (PyUnicode_Check(delobj)) {
2362 PyErr_SetString(PyExc_TypeError,
2363 "deletions are implemented differently for unicode");
2364 return NULL;
2365 }
2366 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2367 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 }
2369 else {
2370 del_table = NULL;
2371 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 }
2373
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002374 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 result = PyString_FromStringAndSize((char *)NULL, inlen);
2376 if (result == NULL)
2377 return NULL;
2378 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002379 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
Guido van Rossumd8faa362007-04-27 19:54:29 +00002381 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 /* If no deletions are required, use faster code */
2383 for (i = inlen; --i >= 0; ) {
2384 c = Py_CHARMASK(*input++);
2385 if (Py_CHARMASK((*output++ = table[c])) != c)
2386 changed = 1;
2387 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002388 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002389 return result;
2390 Py_DECREF(result);
2391 Py_INCREF(input_obj);
2392 return input_obj;
2393 }
2394
Guido van Rossumd8faa362007-04-27 19:54:29 +00002395 if (table == NULL) {
2396 for (i = 0; i < 256; i++)
2397 trans_table[i] = Py_CHARMASK(i);
2398 } else {
2399 for (i = 0; i < 256; i++)
2400 trans_table[i] = Py_CHARMASK(table[i]);
2401 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402
2403 for (i = 0; i < dellen; i++)
2404 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2405
2406 for (i = inlen; --i >= 0; ) {
2407 c = Py_CHARMASK(*input++);
2408 if (trans_table[c] != -1)
2409 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2410 continue;
2411 changed = 1;
2412 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002413 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414 Py_DECREF(result);
2415 Py_INCREF(input_obj);
2416 return input_obj;
2417 }
2418 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002419 if (inlen > 0)
2420 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 return result;
2422}
2423
2424
Thomas Wouters477c8d52006-05-27 19:21:47 +00002425#define FORWARD 1
2426#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427
Thomas Wouters477c8d52006-05-27 19:21:47 +00002428/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429
Thomas Wouters477c8d52006-05-27 19:21:47 +00002430#define findchar(target, target_len, c) \
2431 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432
Thomas Wouters477c8d52006-05-27 19:21:47 +00002433/* String ops must return a string. */
2434/* If the object is subclass of string, create a copy */
2435Py_LOCAL(PyStringObject *)
2436return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002438 if (PyString_CheckExact(self)) {
2439 Py_INCREF(self);
2440 return self;
2441 }
2442 return (PyStringObject *)PyString_FromStringAndSize(
2443 PyString_AS_STRING(self),
2444 PyString_GET_SIZE(self));
2445}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446
Thomas Wouters477c8d52006-05-27 19:21:47 +00002447Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002448countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002449{
2450 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002451 const char *start=target;
2452 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 while ( (start=findchar(start, end-start, c)) != NULL ) {
2455 count++;
2456 if (count >= maxcount)
2457 break;
2458 start += 1;
2459 }
2460 return count;
2461}
2462
2463Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464findstring(const char *target, Py_ssize_t target_len,
2465 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002466 Py_ssize_t start,
2467 Py_ssize_t end,
2468 int direction)
2469{
2470 if (start < 0) {
2471 start += target_len;
2472 if (start < 0)
2473 start = 0;
2474 }
2475 if (end > target_len) {
2476 end = target_len;
2477 } else if (end < 0) {
2478 end += target_len;
2479 if (end < 0)
2480 end = 0;
2481 }
2482
2483 /* zero-length substrings always match at the first attempt */
2484 if (pattern_len == 0)
2485 return (direction > 0) ? start : end;
2486
2487 end -= pattern_len;
2488
2489 if (direction < 0) {
2490 for (; end >= start; end--)
2491 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2492 return end;
2493 } else {
2494 for (; start <= end; start++)
2495 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2496 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002497 }
2498 return -1;
2499}
2500
Thomas Wouters477c8d52006-05-27 19:21:47 +00002501Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502countstring(const char *target, Py_ssize_t target_len,
2503 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 Py_ssize_t start,
2505 Py_ssize_t end,
2506 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002508 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002509
Thomas Wouters477c8d52006-05-27 19:21:47 +00002510 if (start < 0) {
2511 start += target_len;
2512 if (start < 0)
2513 start = 0;
2514 }
2515 if (end > target_len) {
2516 end = target_len;
2517 } else if (end < 0) {
2518 end += target_len;
2519 if (end < 0)
2520 end = 0;
2521 }
2522
2523 /* zero-length substrings match everywhere */
2524 if (pattern_len == 0 || maxcount == 0) {
2525 if (target_len+1 < maxcount)
2526 return target_len+1;
2527 return maxcount;
2528 }
2529
2530 end -= pattern_len;
2531 if (direction < 0) {
2532 for (; (end >= start); end--)
2533 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2534 count++;
2535 if (--maxcount <= 0) break;
2536 end -= pattern_len-1;
2537 }
2538 } else {
2539 for (; (start <= end); start++)
2540 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2541 count++;
2542 if (--maxcount <= 0)
2543 break;
2544 start += pattern_len-1;
2545 }
2546 }
2547 return count;
2548}
2549
2550
2551/* Algorithms for different cases of string replacement */
2552
2553/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2554Py_LOCAL(PyStringObject *)
2555replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002556 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002557 Py_ssize_t maxcount)
2558{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002559 char *self_s, *result_s;
2560 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002561 Py_ssize_t count, i, product;
2562 PyStringObject *result;
2563
2564 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002565
Thomas Wouters477c8d52006-05-27 19:21:47 +00002566 /* 1 at the end plus 1 after every character */
2567 count = self_len+1;
2568 if (maxcount < count)
2569 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002570
Thomas Wouters477c8d52006-05-27 19:21:47 +00002571 /* Check for overflow */
2572 /* result_len = count * to_len + self_len; */
2573 product = count * to_len;
2574 if (product / to_len != count) {
2575 PyErr_SetString(PyExc_OverflowError,
2576 "replace string is too long");
2577 return NULL;
2578 }
2579 result_len = product + self_len;
2580 if (result_len < 0) {
2581 PyErr_SetString(PyExc_OverflowError,
2582 "replace string is too long");
2583 return NULL;
2584 }
2585
2586 if (! (result = (PyStringObject *)
2587 PyString_FromStringAndSize(NULL, result_len)) )
2588 return NULL;
2589
2590 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002591 result_s = PyString_AS_STRING(result);
2592
2593 /* TODO: special case single character, which doesn't need memcpy */
2594
2595 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002596 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002597 result_s += to_len;
2598 count -= 1;
2599
2600 for (i=0; i<count; i++) {
2601 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002602 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002603 result_s += to_len;
2604 }
2605
2606 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002607 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002608
2609 return result;
2610}
2611
2612/* Special case for deleting a single character */
2613/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2614Py_LOCAL(PyStringObject *)
2615replace_delete_single_character(PyStringObject *self,
2616 char from_c, Py_ssize_t maxcount)
2617{
2618 char *self_s, *result_s;
2619 char *start, *next, *end;
2620 Py_ssize_t self_len, result_len;
2621 Py_ssize_t count;
2622 PyStringObject *result;
2623
2624 self_len = PyString_GET_SIZE(self);
2625 self_s = PyString_AS_STRING(self);
2626
2627 count = countchar(self_s, self_len, from_c, maxcount);
2628 if (count == 0) {
2629 return return_self(self);
2630 }
2631
2632 result_len = self_len - count; /* from_len == 1 */
2633 assert(result_len>=0);
2634
2635 if ( (result = (PyStringObject *)
2636 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2637 return NULL;
2638 result_s = PyString_AS_STRING(result);
2639
2640 start = self_s;
2641 end = self_s + self_len;
2642 while (count-- > 0) {
2643 next = findchar(start, end-start, from_c);
2644 if (next == NULL)
2645 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002646 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002647 result_s += (next-start);
2648 start = next+1;
2649 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002650 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002651
Thomas Wouters477c8d52006-05-27 19:21:47 +00002652 return result;
2653}
2654
2655/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2656
2657Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002658replace_delete_substring(PyStringObject *self,
2659 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002660 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002661 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002662 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002663 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002664 Py_ssize_t count, offset;
2665 PyStringObject *result;
2666
2667 self_len = PyString_GET_SIZE(self);
2668 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002669
2670 count = countstring(self_s, self_len,
2671 from_s, from_len,
2672 0, self_len, 1,
2673 maxcount);
2674
2675 if (count == 0) {
2676 /* no matches */
2677 return return_self(self);
2678 }
2679
2680 result_len = self_len - (count * from_len);
2681 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002682
Thomas Wouters477c8d52006-05-27 19:21:47 +00002683 if ( (result = (PyStringObject *)
2684 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2685 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002686
Thomas Wouters477c8d52006-05-27 19:21:47 +00002687 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688
Thomas Wouters477c8d52006-05-27 19:21:47 +00002689 start = self_s;
2690 end = self_s + self_len;
2691 while (count-- > 0) {
2692 offset = findstring(start, end-start,
2693 from_s, from_len,
2694 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002695 if (offset == -1)
2696 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002697 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002698
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002699 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 result_s += (next-start);
2702 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002703 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002704 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002705 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002706}
2707
Thomas Wouters477c8d52006-05-27 19:21:47 +00002708/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2709Py_LOCAL(PyStringObject *)
2710replace_single_character_in_place(PyStringObject *self,
2711 char from_c, char to_c,
2712 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002713{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002714 char *self_s, *result_s, *start, *end, *next;
2715 Py_ssize_t self_len;
2716 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002717
Thomas Wouters477c8d52006-05-27 19:21:47 +00002718 /* The result string will be the same size */
2719 self_s = PyString_AS_STRING(self);
2720 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002721
Thomas Wouters477c8d52006-05-27 19:21:47 +00002722 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002723
Thomas Wouters477c8d52006-05-27 19:21:47 +00002724 if (next == NULL) {
2725 /* No matches; return the original string */
2726 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002727 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002728
Thomas Wouters477c8d52006-05-27 19:21:47 +00002729 /* Need to make a new string */
2730 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2731 if (result == NULL)
2732 return NULL;
2733 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002734 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002735
Thomas Wouters477c8d52006-05-27 19:21:47 +00002736 /* change everything in-place, starting with this one */
2737 start = result_s + (next-self_s);
2738 *start = to_c;
2739 start++;
2740 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002741
Thomas Wouters477c8d52006-05-27 19:21:47 +00002742 while (--maxcount > 0) {
2743 next = findchar(start, end-start, from_c);
2744 if (next == NULL)
2745 break;
2746 *next = to_c;
2747 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002748 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002749
Thomas Wouters477c8d52006-05-27 19:21:47 +00002750 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002751}
2752
Thomas Wouters477c8d52006-05-27 19:21:47 +00002753/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2754Py_LOCAL(PyStringObject *)
2755replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002756 const char *from_s, Py_ssize_t from_len,
2757 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002758 Py_ssize_t maxcount)
2759{
2760 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002761 char *self_s;
2762 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002763 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002764
Thomas Wouters477c8d52006-05-27 19:21:47 +00002765 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002766
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767 self_s = PyString_AS_STRING(self);
2768 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002769
Thomas Wouters477c8d52006-05-27 19:21:47 +00002770 offset = findstring(self_s, self_len,
2771 from_s, from_len,
2772 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002773 if (offset == -1) {
2774 /* No matches; return the original string */
2775 return return_self(self);
2776 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002777
Thomas Wouters477c8d52006-05-27 19:21:47 +00002778 /* Need to make a new string */
2779 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2780 if (result == NULL)
2781 return NULL;
2782 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002783 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002784
Thomas Wouters477c8d52006-05-27 19:21:47 +00002785 /* change everything in-place, starting with this one */
2786 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002787 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 start += from_len;
2789 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002790
Thomas Wouters477c8d52006-05-27 19:21:47 +00002791 while ( --maxcount > 0) {
2792 offset = findstring(start, end-start,
2793 from_s, from_len,
2794 0, end-start, FORWARD);
2795 if (offset==-1)
2796 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002797 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002798 start += offset+from_len;
2799 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002800
Thomas Wouters477c8d52006-05-27 19:21:47 +00002801 return result;
2802}
2803
2804/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2805Py_LOCAL(PyStringObject *)
2806replace_single_character(PyStringObject *self,
2807 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002808 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002809 Py_ssize_t maxcount)
2810{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002811 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002812 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002813 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002814 Py_ssize_t count, product;
2815 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002816
Thomas Wouters477c8d52006-05-27 19:21:47 +00002817 self_s = PyString_AS_STRING(self);
2818 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002819
Thomas Wouters477c8d52006-05-27 19:21:47 +00002820 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002821 if (count == 0) {
2822 /* no matches, return unchanged */
2823 return return_self(self);
2824 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002825
Thomas Wouters477c8d52006-05-27 19:21:47 +00002826 /* use the difference between current and new, hence the "-1" */
2827 /* result_len = self_len + count * (to_len-1) */
2828 product = count * (to_len-1);
2829 if (product / (to_len-1) != count) {
2830 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2831 return NULL;
2832 }
2833 result_len = self_len + product;
2834 if (result_len < 0) {
2835 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2836 return NULL;
2837 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002838
Thomas Wouters477c8d52006-05-27 19:21:47 +00002839 if ( (result = (PyStringObject *)
2840 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2841 return NULL;
2842 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002843
Thomas Wouters477c8d52006-05-27 19:21:47 +00002844 start = self_s;
2845 end = self_s + self_len;
2846 while (count-- > 0) {
2847 next = findchar(start, end-start, from_c);
2848 if (next == NULL)
2849 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002850
Thomas Wouters477c8d52006-05-27 19:21:47 +00002851 if (next == start) {
2852 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002853 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002854 result_s += to_len;
2855 start += 1;
2856 } else {
2857 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002858 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002859 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002860 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002861 result_s += to_len;
2862 start = next+1;
2863 }
2864 }
2865 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002866 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002867
Thomas Wouters477c8d52006-05-27 19:21:47 +00002868 return result;
2869}
2870
2871/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2872Py_LOCAL(PyStringObject *)
2873replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002874 const char *from_s, Py_ssize_t from_len,
2875 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002876 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002877 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002878 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002879 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002880 Py_ssize_t count, offset, product;
2881 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002882
Thomas Wouters477c8d52006-05-27 19:21:47 +00002883 self_s = PyString_AS_STRING(self);
2884 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002885
Thomas Wouters477c8d52006-05-27 19:21:47 +00002886 count = countstring(self_s, self_len,
2887 from_s, from_len,
2888 0, self_len, FORWARD, maxcount);
2889 if (count == 0) {
2890 /* no matches, return unchanged */
2891 return return_self(self);
2892 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002893
Thomas Wouters477c8d52006-05-27 19:21:47 +00002894 /* Check for overflow */
2895 /* result_len = self_len + count * (to_len-from_len) */
2896 product = count * (to_len-from_len);
2897 if (product / (to_len-from_len) != count) {
2898 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2899 return NULL;
2900 }
2901 result_len = self_len + product;
2902 if (result_len < 0) {
2903 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2904 return NULL;
2905 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002906
Thomas Wouters477c8d52006-05-27 19:21:47 +00002907 if ( (result = (PyStringObject *)
2908 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2909 return NULL;
2910 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002911
Thomas Wouters477c8d52006-05-27 19:21:47 +00002912 start = self_s;
2913 end = self_s + self_len;
2914 while (count-- > 0) {
2915 offset = findstring(start, end-start,
2916 from_s, from_len,
2917 0, end-start, FORWARD);
2918 if (offset == -1)
2919 break;
2920 next = start+offset;
2921 if (next == start) {
2922 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002923 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002924 result_s += to_len;
2925 start += from_len;
2926 } else {
2927 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002928 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002929 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002930 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002931 result_s += to_len;
2932 start = next+from_len;
2933 }
2934 }
2935 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002936 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002937
Thomas Wouters477c8d52006-05-27 19:21:47 +00002938 return result;
2939}
2940
2941
2942Py_LOCAL(PyStringObject *)
2943replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002944 const char *from_s, Py_ssize_t from_len,
2945 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002946 Py_ssize_t maxcount)
2947{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002948 if (maxcount < 0) {
2949 maxcount = PY_SSIZE_T_MAX;
2950 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2951 /* nothing to do; return the original string */
2952 return return_self(self);
2953 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002954
Thomas Wouters477c8d52006-05-27 19:21:47 +00002955 if (maxcount == 0 ||
2956 (from_len == 0 && to_len == 0)) {
2957 /* nothing to do; return the original string */
2958 return return_self(self);
2959 }
2960
2961 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002962
Thomas Wouters477c8d52006-05-27 19:21:47 +00002963 if (from_len == 0) {
2964 /* insert the 'to' string everywhere. */
2965 /* >>> "Python".replace("", ".") */
2966 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002967 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002968 }
2969
2970 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2971 /* point for an empty self string to generate a non-empty string */
2972 /* Special case so the remaining code always gets a non-empty string */
2973 if (PyString_GET_SIZE(self) == 0) {
2974 return return_self(self);
2975 }
2976
2977 if (to_len == 0) {
2978 /* delete all occurances of 'from' string */
2979 if (from_len == 1) {
2980 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002981 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002982 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002983 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002984 }
2985 }
2986
2987 /* Handle special case where both strings have the same length */
2988
2989 if (from_len == to_len) {
2990 if (from_len == 1) {
2991 return replace_single_character_in_place(
2992 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002993 from_s[0],
2994 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002995 maxcount);
2996 } else {
2997 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002998 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002999 }
3000 }
3001
3002 /* Otherwise use the more generic algorithms */
3003 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003004 return replace_single_character(self, from_s[0],
3005 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003006 } else {
3007 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003008 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003009 }
3010}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003011
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003012PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003013"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003014\n\
3015Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003016old replaced by new. If the optional argument count is\n\
3017given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003018
3019static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003020string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003021{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003022 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003023 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003024 const char *from_s, *to_s;
3025 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003026
Thomas Wouters477c8d52006-05-27 19:21:47 +00003027 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003028 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029
Thomas Wouters477c8d52006-05-27 19:21:47 +00003030 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003031 from_s = PyString_AS_STRING(from);
3032 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003034 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003035 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003036 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003037 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003038 return NULL;
3039
Thomas Wouters477c8d52006-05-27 19:21:47 +00003040 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003041 to_s = PyString_AS_STRING(to);
3042 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003044 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003045 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003046 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003047 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048 return NULL;
3049
Thomas Wouters477c8d52006-05-27 19:21:47 +00003050 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003051 from_s, from_len,
3052 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003053}
3054
Thomas Wouters477c8d52006-05-27 19:21:47 +00003055/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003057/* Matches the end (direction >= 0) or start (direction < 0) of self
3058 * against substr, using the start and end arguments. Returns
3059 * -1 on error, 0 if not found and 1 if found.
3060 */
3061Py_LOCAL(int)
3062_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3063 Py_ssize_t end, int direction)
3064{
3065 Py_ssize_t len = PyString_GET_SIZE(self);
3066 Py_ssize_t slen;
3067 const char* sub;
3068 const char* str;
3069
3070 if (PyString_Check(substr)) {
3071 sub = PyString_AS_STRING(substr);
3072 slen = PyString_GET_SIZE(substr);
3073 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003074 else if (PyUnicode_Check(substr))
3075 return PyUnicode_Tailmatch((PyObject *)self,
3076 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003077 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3078 return -1;
3079 str = PyString_AS_STRING(self);
3080
3081 string_adjust_indices(&start, &end, len);
3082
3083 if (direction < 0) {
3084 /* startswith */
3085 if (start+slen > len)
3086 return 0;
3087 } else {
3088 /* endswith */
3089 if (end-start < slen || start > len)
3090 return 0;
3091
3092 if (end-slen > start)
3093 start = end - slen;
3094 }
3095 if (end-start >= slen)
3096 return ! memcmp(str+start, sub, slen);
3097 return 0;
3098}
3099
3100
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003101PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003102"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003103\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003104Return True if S starts with the specified prefix, False otherwise.\n\
3105With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003106With optional end, stop comparing S at that position.\n\
3107prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003108
3109static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003110string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003111{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003112 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003113 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003115 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003116
Guido van Rossumc6821402000-05-08 14:08:05 +00003117 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3118 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003119 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003120 if (PyTuple_Check(subobj)) {
3121 Py_ssize_t i;
3122 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3123 result = _string_tailmatch(self,
3124 PyTuple_GET_ITEM(subobj, i),
3125 start, end, -1);
3126 if (result == -1)
3127 return NULL;
3128 else if (result) {
3129 Py_RETURN_TRUE;
3130 }
3131 }
3132 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003134 result = _string_tailmatch(self, subobj, start, end, -1);
3135 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003137 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003138 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139}
3140
3141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003142PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003143"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003145Return True if S ends with the specified suffix, False otherwise.\n\
3146With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003147With optional end, stop comparing S at that position.\n\
3148suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003149
3150static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003151string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003153 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003154 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003156 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003157
Guido van Rossumc6821402000-05-08 14:08:05 +00003158 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3159 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003161 if (PyTuple_Check(subobj)) {
3162 Py_ssize_t i;
3163 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3164 result = _string_tailmatch(self,
3165 PyTuple_GET_ITEM(subobj, i),
3166 start, end, +1);
3167 if (result == -1)
3168 return NULL;
3169 else if (result) {
3170 Py_RETURN_TRUE;
3171 }
3172 }
3173 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003175 result = _string_tailmatch(self, subobj, start, end, +1);
3176 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003178 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003179 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180}
3181
3182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003183PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003184"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003185\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003186Encodes S using the codec registered for encoding. encoding defaults\n\
3187to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003188handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003189a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3190'xmlcharrefreplace' as well as any other name registered with\n\
3191codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003192
3193static PyObject *
3194string_encode(PyStringObject *self, PyObject *args)
3195{
3196 char *encoding = NULL;
3197 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003198 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003199
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003200 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3201 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003202 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003203 if (v == NULL)
3204 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003205 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003206 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003207 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003208 "(type=%.400s)",
3209 v->ob_type->tp_name);
3210 Py_DECREF(v);
3211 return NULL;
3212 }
3213 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003214
3215 onError:
3216 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003217}
3218
3219
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003220PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003221"S.decode([encoding[,errors]]) -> object\n\
3222\n\
3223Decodes S using the codec registered for encoding. encoding defaults\n\
3224to the default encoding. errors may be given to set a different error\n\
3225handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003226a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3227as well as any other name registerd with codecs.register_error that is\n\
3228able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003229
3230static PyObject *
3231string_decode(PyStringObject *self, PyObject *args)
3232{
3233 char *encoding = NULL;
3234 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003235 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003236
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003237 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3238 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003239 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003240 if (v == NULL)
3241 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003242 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3243 PyErr_Format(PyExc_TypeError,
3244 "decoder did not return a string/unicode object "
3245 "(type=%.400s)",
3246 v->ob_type->tp_name);
3247 Py_DECREF(v);
3248 return NULL;
3249 }
3250 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003251
3252 onError:
3253 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003254}
3255
3256
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003257PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003258"S.expandtabs([tabsize]) -> string\n\
3259\n\
3260Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003261If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262
3263static PyObject*
3264string_expandtabs(PyStringObject *self, PyObject *args)
3265{
3266 const char *e, *p;
3267 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003268 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003269 PyObject *u;
3270 int tabsize = 8;
3271
3272 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3273 return NULL;
3274
Thomas Wouters7e474022000-07-16 12:04:32 +00003275 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003276 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003277 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3278 for (p = PyString_AS_STRING(self); p < e; p++)
3279 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003280 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003281 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003282 if (old_j > j) {
3283 PyErr_SetString(PyExc_OverflowError,
3284 "new string is too long");
3285 return NULL;
3286 }
3287 old_j = j;
3288 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003289 }
3290 else {
3291 j++;
3292 if (*p == '\n' || *p == '\r') {
3293 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003294 old_j = j = 0;
3295 if (i < 0) {
3296 PyErr_SetString(PyExc_OverflowError,
3297 "new string is too long");
3298 return NULL;
3299 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003300 }
3301 }
3302
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003303 if ((i + j) < 0) {
3304 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3305 return NULL;
3306 }
3307
Guido van Rossum4c08d552000-03-10 22:55:18 +00003308 /* Second pass: create output string and fill it */
3309 u = PyString_FromStringAndSize(NULL, i + j);
3310 if (!u)
3311 return NULL;
3312
3313 j = 0;
3314 q = PyString_AS_STRING(u);
3315
3316 for (p = PyString_AS_STRING(self); p < e; p++)
3317 if (*p == '\t') {
3318 if (tabsize > 0) {
3319 i = tabsize - (j % tabsize);
3320 j += i;
3321 while (i--)
3322 *q++ = ' ';
3323 }
3324 }
3325 else {
3326 j++;
3327 *q++ = *p;
3328 if (*p == '\n' || *p == '\r')
3329 j = 0;
3330 }
3331
3332 return u;
3333}
3334
Thomas Wouters477c8d52006-05-27 19:21:47 +00003335Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003336pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003337{
3338 PyObject *u;
3339
3340 if (left < 0)
3341 left = 0;
3342 if (right < 0)
3343 right = 0;
3344
Tim Peters8fa5dd02001-09-12 02:18:30 +00003345 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003346 Py_INCREF(self);
3347 return (PyObject *)self;
3348 }
3349
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003350 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003351 left + PyString_GET_SIZE(self) + right);
3352 if (u) {
3353 if (left)
3354 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003355 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003356 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003357 PyString_GET_SIZE(self));
3358 if (right)
3359 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3360 fill, right);
3361 }
3362
3363 return u;
3364}
3365
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003366PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003367"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003368"\n"
3369"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003370"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003371
3372static PyObject *
3373string_ljust(PyStringObject *self, PyObject *args)
3374{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003375 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003376 char fillchar = ' ';
3377
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003378 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379 return NULL;
3380
Tim Peters8fa5dd02001-09-12 02:18:30 +00003381 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382 Py_INCREF(self);
3383 return (PyObject*) self;
3384 }
3385
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003386 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387}
3388
3389
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003390PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003391"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003392"\n"
3393"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003394"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395
3396static PyObject *
3397string_rjust(PyStringObject *self, PyObject *args)
3398{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003399 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003400 char fillchar = ' ';
3401
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003402 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403 return NULL;
3404
Tim Peters8fa5dd02001-09-12 02:18:30 +00003405 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 Py_INCREF(self);
3407 return (PyObject*) self;
3408 }
3409
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003410 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411}
3412
3413
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003414PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003415"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003416"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003417"Return S centered in a string of length width. Padding is\n"
3418"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419
3420static PyObject *
3421string_center(PyStringObject *self, PyObject *args)
3422{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003423 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003424 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003425 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003427 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003428 return NULL;
3429
Tim Peters8fa5dd02001-09-12 02:18:30 +00003430 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 Py_INCREF(self);
3432 return (PyObject*) self;
3433 }
3434
3435 marg = width - PyString_GET_SIZE(self);
3436 left = marg / 2 + (marg & width & 1);
3437
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003438 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003439}
3440
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003441PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003442"S.zfill(width) -> string\n"
3443"\n"
3444"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003445"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003446
3447static PyObject *
3448string_zfill(PyStringObject *self, PyObject *args)
3449{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003450 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003451 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003452 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003453 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003454
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003455 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003456 return NULL;
3457
3458 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003459 if (PyString_CheckExact(self)) {
3460 Py_INCREF(self);
3461 return (PyObject*) self;
3462 }
3463 else
3464 return PyString_FromStringAndSize(
3465 PyString_AS_STRING(self),
3466 PyString_GET_SIZE(self)
3467 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003468 }
3469
3470 fill = width - PyString_GET_SIZE(self);
3471
3472 s = pad(self, fill, 0, '0');
3473
3474 if (s == NULL)
3475 return NULL;
3476
3477 p = PyString_AS_STRING(s);
3478 if (p[fill] == '+' || p[fill] == '-') {
3479 /* move sign to beginning of string */
3480 p[0] = p[fill];
3481 p[fill] = '0';
3482 }
3483
3484 return (PyObject*) s;
3485}
3486
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003487PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003488"S.isspace() -> bool\n\
3489\n\
3490Return True if all characters in S are whitespace\n\
3491and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492
3493static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003494string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495{
Fred Drakeba096332000-07-09 07:04:36 +00003496 register const unsigned char *p
3497 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003498 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003499
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500 /* Shortcut for single character strings */
3501 if (PyString_GET_SIZE(self) == 1 &&
3502 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003503 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003504
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003505 /* Special case for empty strings */
3506 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003507 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003508
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509 e = p + PyString_GET_SIZE(self);
3510 for (; p < e; p++) {
3511 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003512 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003514 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515}
3516
3517
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003518PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003519"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003520\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003521Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003522and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003523
3524static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003525string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003526{
Fred Drakeba096332000-07-09 07:04:36 +00003527 register const unsigned char *p
3528 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003529 register const unsigned char *e;
3530
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003531 /* Shortcut for single character strings */
3532 if (PyString_GET_SIZE(self) == 1 &&
3533 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003534 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003535
3536 /* Special case for empty strings */
3537 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539
3540 e = p + PyString_GET_SIZE(self);
3541 for (; p < e; p++) {
3542 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003543 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003544 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003545 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003546}
3547
3548
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003549PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003551\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003552Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003553and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003554
3555static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003556string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003557{
Fred Drakeba096332000-07-09 07:04:36 +00003558 register const unsigned char *p
3559 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003560 register const unsigned char *e;
3561
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562 /* Shortcut for single character strings */
3563 if (PyString_GET_SIZE(self) == 1 &&
3564 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566
3567 /* Special case for empty strings */
3568 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571 e = p + PyString_GET_SIZE(self);
3572 for (; p < e; p++) {
3573 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003575 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003577}
3578
3579
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003580PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003582\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003583Return True if all characters in S are digits\n\
3584and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585
3586static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003587string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588{
Fred Drakeba096332000-07-09 07:04:36 +00003589 register const unsigned char *p
3590 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003591 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592
Guido van Rossum4c08d552000-03-10 22:55:18 +00003593 /* Shortcut for single character strings */
3594 if (PyString_GET_SIZE(self) == 1 &&
3595 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003598 /* Special case for empty strings */
3599 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003601
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602 e = p + PyString_GET_SIZE(self);
3603 for (; p < e; p++) {
3604 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003605 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003608}
3609
3610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003611PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616
3617static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003618string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619{
Fred Drakeba096332000-07-09 07:04:36 +00003620 register const unsigned char *p
3621 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003622 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623 int cased;
3624
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625 /* Shortcut for single character strings */
3626 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003629 /* Special case for empty strings */
3630 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003632
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633 e = p + PyString_GET_SIZE(self);
3634 cased = 0;
3635 for (; p < e; p++) {
3636 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638 else if (!cased && islower(*p))
3639 cased = 1;
3640 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642}
3643
3644
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003645PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003648Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003649at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650
3651static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003652string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653{
Fred Drakeba096332000-07-09 07:04:36 +00003654 register const unsigned char *p
3655 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003656 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 int cased;
3658
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659 /* Shortcut for single character strings */
3660 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003663 /* Special case for empty strings */
3664 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003666
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 e = p + PyString_GET_SIZE(self);
3668 cased = 0;
3669 for (; p < e; p++) {
3670 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 else if (!cased && isupper(*p))
3673 cased = 1;
3674 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676}
3677
3678
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003679PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003682Return True if S is a titlecased string and there is at least one\n\
3683character in S, i.e. uppercase characters may only follow uncased\n\
3684characters and lowercase characters only cased ones. Return False\n\
3685otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686
3687static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003688string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689{
Fred Drakeba096332000-07-09 07:04:36 +00003690 register const unsigned char *p
3691 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003692 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693 int cased, previous_is_cased;
3694
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695 /* Shortcut for single character strings */
3696 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003699 /* Special case for empty strings */
3700 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003702
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703 e = p + PyString_GET_SIZE(self);
3704 cased = 0;
3705 previous_is_cased = 0;
3706 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003707 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708
3709 if (isupper(ch)) {
3710 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003711 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 previous_is_cased = 1;
3713 cased = 1;
3714 }
3715 else if (islower(ch)) {
3716 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003717 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718 previous_is_cased = 1;
3719 cased = 1;
3720 }
3721 else
3722 previous_is_cased = 0;
3723 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725}
3726
3727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003728PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003729"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730\n\
3731Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003732Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003733is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735static PyObject*
3736string_splitlines(PyStringObject *self, PyObject *args)
3737{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003738 register Py_ssize_t i;
3739 register Py_ssize_t j;
3740 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003741 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742 PyObject *list;
3743 PyObject *str;
3744 char *data;
3745
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003746 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 return NULL;
3748
3749 data = PyString_AS_STRING(self);
3750 len = PyString_GET_SIZE(self);
3751
Thomas Wouters477c8d52006-05-27 19:21:47 +00003752 /* This does not use the preallocated list because splitlines is
3753 usually run with hundreds of newlines. The overhead of
3754 switching between PyList_SET_ITEM and append causes about a
3755 2-3% slowdown for that common case. A smarter implementation
3756 could move the if check out, so the SET_ITEMs are done first
3757 and the appends only done when the prealloc buffer is full.
3758 That's too much work for little gain.*/
3759
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 list = PyList_New(0);
3761 if (!list)
3762 goto onError;
3763
3764 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003765 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003766
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 /* Find a line and append it */
3768 while (i < len && data[i] != '\n' && data[i] != '\r')
3769 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770
3771 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003772 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773 if (i < len) {
3774 if (data[i] == '\r' && i + 1 < len &&
3775 data[i+1] == '\n')
3776 i += 2;
3777 else
3778 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003779 if (keepends)
3780 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003782 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783 j = i;
3784 }
3785 if (j < len) {
3786 SPLIT_APPEND(data, j, len);
3787 }
3788
3789 return list;
3790
3791 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003792 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793 return NULL;
3794}
3795
3796#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003797#undef SPLIT_ADD
3798#undef MAX_PREALLOC
3799#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003801static PyObject *
3802string_getnewargs(PyStringObject *v)
3803{
3804 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3805}
3806
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003807
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003808static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003809string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003810 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3811 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003812 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003813 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3814 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003815 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3816 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3817 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3818 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3819 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3820 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3821 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003822 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3823 capitalize__doc__},
3824 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3825 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3826 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003827 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3829 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3830 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3831 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3832 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3833 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3834 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003835 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3836 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003837 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3838 startswith__doc__},
3839 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3840 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3841 swapcase__doc__},
3842 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3843 translate__doc__},
3844 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3845 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3846 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3847 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3848 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3849 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3850 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3851 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3852 expandtabs__doc__},
3853 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3854 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003855 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003856 {NULL, NULL} /* sentinel */
3857};
3858
Jeremy Hylton938ace62002-07-17 16:30:39 +00003859static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003860str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3861
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003862static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003863string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003864{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003865 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003866 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003867
Guido van Rossumae960af2001-08-30 03:11:59 +00003868 if (type != &PyString_Type)
3869 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003870 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003871 return NULL;
3872 if (x == NULL)
3873 return PyString_FromString("");
3874 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003875}
3876
Guido van Rossumae960af2001-08-30 03:11:59 +00003877static PyObject *
3878str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3879{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003880 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003881 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003882
3883 assert(PyType_IsSubtype(type, &PyString_Type));
3884 tmp = string_new(&PyString_Type, args, kwds);
3885 if (tmp == NULL)
3886 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003887 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003888 n = PyString_GET_SIZE(tmp);
3889 pnew = type->tp_alloc(type, n);
3890 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003891 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003892 ((PyStringObject *)pnew)->ob_shash =
3893 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003894 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003895 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003896 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003897 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003898}
3899
Guido van Rossumcacfc072002-05-24 19:01:59 +00003900static PyObject *
3901basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3902{
3903 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003904 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003905 return NULL;
3906}
3907
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003908static PyObject *
3909string_mod(PyObject *v, PyObject *w)
3910{
3911 if (!PyString_Check(v)) {
3912 Py_INCREF(Py_NotImplemented);
3913 return Py_NotImplemented;
3914 }
3915 return PyString_Format(v, w);
3916}
3917
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003918PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003919"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003920
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003921static PyNumberMethods string_as_number = {
3922 0, /*nb_add*/
3923 0, /*nb_subtract*/
3924 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003925 string_mod, /*nb_remainder*/
3926};
3927
3928
Guido van Rossumcacfc072002-05-24 19:01:59 +00003929PyTypeObject PyBaseString_Type = {
3930 PyObject_HEAD_INIT(&PyType_Type)
3931 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003932 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003933 0,
3934 0,
3935 0, /* tp_dealloc */
3936 0, /* tp_print */
3937 0, /* tp_getattr */
3938 0, /* tp_setattr */
3939 0, /* tp_compare */
3940 0, /* tp_repr */
3941 0, /* tp_as_number */
3942 0, /* tp_as_sequence */
3943 0, /* tp_as_mapping */
3944 0, /* tp_hash */
3945 0, /* tp_call */
3946 0, /* tp_str */
3947 0, /* tp_getattro */
3948 0, /* tp_setattro */
3949 0, /* tp_as_buffer */
3950 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3951 basestring_doc, /* tp_doc */
3952 0, /* tp_traverse */
3953 0, /* tp_clear */
3954 0, /* tp_richcompare */
3955 0, /* tp_weaklistoffset */
3956 0, /* tp_iter */
3957 0, /* tp_iternext */
3958 0, /* tp_methods */
3959 0, /* tp_members */
3960 0, /* tp_getset */
3961 &PyBaseObject_Type, /* tp_base */
3962 0, /* tp_dict */
3963 0, /* tp_descr_get */
3964 0, /* tp_descr_set */
3965 0, /* tp_dictoffset */
3966 0, /* tp_init */
3967 0, /* tp_alloc */
3968 basestring_new, /* tp_new */
3969 0, /* tp_free */
3970};
3971
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003972PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003973"str(object) -> string\n\
3974\n\
3975Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003976If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003977
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003978static PyObject *str_iter(PyObject *seq);
3979
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003980PyTypeObject PyString_Type = {
3981 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003982 0,
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003983 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003985 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003986 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003987 (printfunc)string_print, /* tp_print */
3988 0, /* tp_getattr */
3989 0, /* tp_setattr */
3990 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003991 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003992 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003993 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003994 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003995 (hashfunc)string_hash, /* tp_hash */
3996 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003997 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003998 PyObject_GenericGetAttr, /* tp_getattro */
3999 0, /* tp_setattro */
4000 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00004001 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
4002 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003 string_doc, /* tp_doc */
4004 0, /* tp_traverse */
4005 0, /* tp_clear */
4006 (richcmpfunc)string_richcompare, /* tp_richcompare */
4007 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004008 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009 0, /* tp_iternext */
4010 string_methods, /* tp_methods */
4011 0, /* tp_members */
4012 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004013 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004014 0, /* tp_dict */
4015 0, /* tp_descr_get */
4016 0, /* tp_descr_set */
4017 0, /* tp_dictoffset */
4018 0, /* tp_init */
4019 0, /* tp_alloc */
4020 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004021 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004022};
4023
4024void
Fred Drakeba096332000-07-09 07:04:36 +00004025PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004026{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004027 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004028 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004029 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004030 if (w == NULL || !PyString_Check(*pv)) {
4031 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004032 *pv = NULL;
4033 return;
4034 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035 v = string_concat((PyStringObject *) *pv, w);
4036 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004037 *pv = v;
4038}
4039
Guido van Rossum013142a1994-08-30 08:19:36 +00004040void
Fred Drakeba096332000-07-09 07:04:36 +00004041PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004042{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043 PyString_Concat(pv, w);
4044 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004045}
4046
4047
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004048/* The following function breaks the notion that strings are immutable:
4049 it changes the size of a string. We get away with this only if there
4050 is only one module referencing the object. You can also think of it
4051 as creating a new string object and destroying the old one, only
4052 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004053 already be known to some other part of the code...
4054 Note that if there's not enough memory to resize the string, the original
4055 string object at *pv is deallocated, *pv is set to NULL, an "out of
4056 memory" exception is set, and -1 is returned. Else (on success) 0 is
4057 returned, and the value in *pv may or may not be the same as on input.
4058 As always, an extra byte is allocated for a trailing \0 byte (newsize
4059 does *not* include that), and a trailing \0 byte is stored.
4060*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004061
4062int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004063_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004064{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004065 register PyObject *v;
4066 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004067 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004068 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4069 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 Py_DECREF(v);
4072 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004073 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004074 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004075 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004076 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004077 _Py_ForgetReference(v);
4078 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004079 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004080 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004081 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004083 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004085 _Py_NewReference(*pv);
4086 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004087 sv->ob_size = newsize;
4088 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004089 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 return 0;
4091}
Guido van Rossume5372401993-03-16 12:15:04 +00004092
4093/* Helpers for formatstring */
4094
Thomas Wouters477c8d52006-05-27 19:21:47 +00004095Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004096getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004097{
Thomas Wouters977485d2006-02-16 15:59:12 +00004098 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004099 if (argidx < arglen) {
4100 (*p_argidx)++;
4101 if (arglen < 0)
4102 return args;
4103 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004104 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 PyErr_SetString(PyExc_TypeError,
4107 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004108 return NULL;
4109}
4110
Tim Peters38fd5b62000-09-21 05:43:11 +00004111/* Format codes
4112 * F_LJUST '-'
4113 * F_SIGN '+'
4114 * F_BLANK ' '
4115 * F_ALT '#'
4116 * F_ZERO '0'
4117 */
Guido van Rossume5372401993-03-16 12:15:04 +00004118#define F_LJUST (1<<0)
4119#define F_SIGN (1<<1)
4120#define F_BLANK (1<<2)
4121#define F_ALT (1<<3)
4122#define F_ZERO (1<<4)
4123
Thomas Wouters477c8d52006-05-27 19:21:47 +00004124Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004125formatfloat(char *buf, size_t buflen, int flags,
4126 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004127{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004128 /* fmt = '%#.' + `prec` + `type`
4129 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004130 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004131 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004132 x = PyFloat_AsDouble(v);
4133 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004134 PyErr_Format(PyExc_TypeError, "float argument required, "
4135 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004136 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004137 }
Guido van Rossume5372401993-03-16 12:15:04 +00004138 if (prec < 0)
4139 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004140 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4141 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004142 /* Worst case length calc to ensure no buffer overrun:
4143
4144 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004145 fmt = %#.<prec>g
4146 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004147 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004148 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004149
4150 'f' formats:
4151 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4152 len = 1 + 50 + 1 + prec = 52 + prec
4153
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004154 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004155 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004156
4157 */
4158 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4159 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004160 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004161 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004162 return -1;
4163 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004164 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4165 (flags&F_ALT) ? "#" : "",
4166 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004167 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004168 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004169}
4170
Tim Peters38fd5b62000-09-21 05:43:11 +00004171/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4172 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4173 * Python's regular ints.
4174 * Return value: a new PyString*, or NULL if error.
4175 * . *pbuf is set to point into it,
4176 * *plen set to the # of chars following that.
4177 * Caller must decref it when done using pbuf.
4178 * The string starting at *pbuf is of the form
4179 * "-"? ("0x" | "0X")? digit+
4180 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004181 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004182 * There will be at least prec digits, zero-filled on the left if
4183 * necessary to get that many.
4184 * val object to be converted
4185 * flags bitmask of format flags; only F_ALT is looked at
4186 * prec minimum number of digits; 0-fill on left if needed
4187 * type a character in [duoxX]; u acts the same as d
4188 *
4189 * CAUTION: o, x and X conversions on regular ints can never
4190 * produce a '-' sign, but can for Python's unbounded ints.
4191 */
4192PyObject*
4193_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4194 char **pbuf, int *plen)
4195{
4196 PyObject *result = NULL;
4197 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004198 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004199 int sign; /* 1 if '-', else 0 */
4200 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004201 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004202 int numdigits; /* len == numnondigits + numdigits */
4203 int numnondigits = 0;
4204
Guido van Rossumddefaf32007-01-14 03:31:43 +00004205 /* Avoid exceeding SSIZE_T_MAX */
4206 if (prec > PY_SSIZE_T_MAX-3) {
4207 PyErr_SetString(PyExc_OverflowError,
4208 "precision too large");
4209 return NULL;
4210 }
4211
4212
Tim Peters38fd5b62000-09-21 05:43:11 +00004213 switch (type) {
4214 case 'd':
4215 case 'u':
4216 result = val->ob_type->tp_str(val);
4217 break;
4218 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004219 numnondigits = 2;
4220 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 break;
4222 case 'x':
4223 case 'X':
4224 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004225 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004226 break;
4227 default:
4228 assert(!"'type' not in [duoxX]");
4229 }
4230 if (!result)
4231 return NULL;
4232
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004233 buf = PyString_AsString(result);
4234 if (!buf) {
4235 Py_DECREF(result);
4236 return NULL;
4237 }
4238
Tim Peters38fd5b62000-09-21 05:43:11 +00004239 /* To modify the string in-place, there can only be one reference. */
4240 if (result->ob_refcnt != 1) {
4241 PyErr_BadInternalCall();
4242 return NULL;
4243 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004244 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004245 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004246 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4247 return NULL;
4248 }
4249 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004250 if (buf[len-1] == 'L') {
4251 --len;
4252 buf[len] = '\0';
4253 }
4254 sign = buf[0] == '-';
4255 numnondigits += sign;
4256 numdigits = len - numnondigits;
4257 assert(numdigits > 0);
4258
Tim Petersfff53252001-04-12 18:38:48 +00004259 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004260 if (((flags & F_ALT) == 0 &&
4261 (type == 'o' || type == 'x' || type == 'X'))) {
4262 assert(buf[sign] == '0');
4263 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4264 buf[sign+1] == 'o');
4265 numnondigits -= 2;
4266 buf += 2;
4267 len -= 2;
4268 if (sign)
4269 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 assert(len == numnondigits + numdigits);
4271 assert(numdigits > 0);
4272 }
4273
4274 /* Fill with leading zeroes to meet minimum width. */
4275 if (prec > numdigits) {
4276 PyObject *r1 = PyString_FromStringAndSize(NULL,
4277 numnondigits + prec);
4278 char *b1;
4279 if (!r1) {
4280 Py_DECREF(result);
4281 return NULL;
4282 }
4283 b1 = PyString_AS_STRING(r1);
4284 for (i = 0; i < numnondigits; ++i)
4285 *b1++ = *buf++;
4286 for (i = 0; i < prec - numdigits; i++)
4287 *b1++ = '0';
4288 for (i = 0; i < numdigits; i++)
4289 *b1++ = *buf++;
4290 *b1 = '\0';
4291 Py_DECREF(result);
4292 result = r1;
4293 buf = PyString_AS_STRING(result);
4294 len = numnondigits + prec;
4295 }
4296
4297 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004298 if (type == 'X') {
4299 /* Need to convert all lower case letters to upper case.
4300 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004301 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004302 if (buf[i] >= 'a' && buf[i] <= 'x')
4303 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004304 }
4305 *pbuf = buf;
4306 *plen = len;
4307 return result;
4308}
4309
Thomas Wouters477c8d52006-05-27 19:21:47 +00004310Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004311formatint(char *buf, size_t buflen, int flags,
4312 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004313{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004314 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4316 + 1 + 1 = 24 */
4317 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004318 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004319 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004320
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004321 x = PyInt_AsLong(v);
4322 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004323 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4324 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004325 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004326 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004327 if (x < 0 && type == 'u') {
4328 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004329 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004330 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4331 sign = "-";
4332 else
4333 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004334 if (prec < 0)
4335 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004336
4337 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004338 (type == 'x' || type == 'X' || type == 'o')) {
4339 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004341 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004342 * - when 0 is being converted, the C standard leaves off
4343 * the '0x' or '0X', which is inconsistent with other
4344 * %#x/%#X conversions and inconsistent with Python's
4345 * hex() function
4346 * - there are platforms that violate the standard and
4347 * convert 0 with the '0x' or '0X'
4348 * (Metrowerks, Compaq Tru64)
4349 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004350 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004351 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004352 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004353 * We can achieve the desired consistency by inserting our
4354 * own '0x' or '0X' prefix, and substituting %x/%X in place
4355 * of %#x/%#X.
4356 *
4357 * Note that this is the same approach as used in
4358 * formatint() in unicodeobject.c
4359 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004360 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4361 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004362 }
4363 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004364 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4365 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004366 prec, type);
4367 }
4368
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004369 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004370 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004372 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004373 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004374 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004375 return -1;
4376 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004377 if (sign[0])
4378 PyOS_snprintf(buf, buflen, fmt, -x);
4379 else
4380 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004381 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004382}
4383
Thomas Wouters477c8d52006-05-27 19:21:47 +00004384Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004385formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004386{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004387 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004388 if (PyString_Check(v)) {
4389 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004390 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004391 }
4392 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004393 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004394 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004395 }
4396 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004397 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004398}
4399
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004400/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4401
4402 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4403 chars are formatted. XXX This is a magic number. Each formatting
4404 routine does bounds checking to ensure no overflow, but a better
4405 solution may be to malloc a buffer of appropriate size for each
4406 format. For now, the current solution is sufficient.
4407*/
4408#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004409
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004410PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004411PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004412{
4413 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004414 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004415 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004416 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004417 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004418 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004419 PyObject *dict = NULL;
4420 if (format == NULL || !PyString_Check(format) || args == NULL) {
4421 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004422 return NULL;
4423 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004424 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004425 fmt = PyString_AS_STRING(format);
4426 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004427 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004428 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004429 if (result == NULL)
4430 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004431 res = PyString_AsString(result);
4432 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004433 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004434 argidx = 0;
4435 }
4436 else {
4437 arglen = -1;
4438 argidx = -2;
4439 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004440 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4441 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004442 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004443 while (--fmtcnt >= 0) {
4444 if (*fmt != '%') {
4445 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004446 rescnt = fmtcnt + 100;
4447 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004448 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004449 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004450 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004451 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004452 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004453 }
4454 *res++ = *fmt++;
4455 }
4456 else {
4457 /* Got a format specifier */
4458 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004459 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004461 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004462 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463 PyObject *v = NULL;
4464 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004465 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004466 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004467 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004468 char formatbuf[FORMATBUFLEN];
4469 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004470 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004471 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004472
Guido van Rossumda9c2711996-12-05 21:58:58 +00004473 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004474 if (*fmt == '(') {
4475 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004476 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004478 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004479
4480 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004482 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004483 goto error;
4484 }
4485 ++fmt;
4486 --fmtcnt;
4487 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004488 /* Skip over balanced parentheses */
4489 while (pcount > 0 && --fmtcnt >= 0) {
4490 if (*fmt == ')')
4491 --pcount;
4492 else if (*fmt == '(')
4493 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004494 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004495 }
4496 keylen = fmt - keystart - 1;
4497 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004499 "incomplete format key");
4500 goto error;
4501 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 key = PyString_FromStringAndSize(keystart,
4503 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004504 if (key == NULL)
4505 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004506 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004508 args_owned = 0;
4509 }
4510 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004512 if (args == NULL) {
4513 goto error;
4514 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004515 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004516 arglen = -1;
4517 argidx = -2;
4518 }
Guido van Rossume5372401993-03-16 12:15:04 +00004519 while (--fmtcnt >= 0) {
4520 switch (c = *fmt++) {
4521 case '-': flags |= F_LJUST; continue;
4522 case '+': flags |= F_SIGN; continue;
4523 case ' ': flags |= F_BLANK; continue;
4524 case '#': flags |= F_ALT; continue;
4525 case '0': flags |= F_ZERO; continue;
4526 }
4527 break;
4528 }
4529 if (c == '*') {
4530 v = getnextarg(args, arglen, &argidx);
4531 if (v == NULL)
4532 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 if (!PyInt_Check(v)) {
4534 PyErr_SetString(PyExc_TypeError,
4535 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004536 goto error;
4537 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004538 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004539 if (width == -1 && PyErr_Occurred())
4540 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004541 if (width < 0) {
4542 flags |= F_LJUST;
4543 width = -width;
4544 }
Guido van Rossume5372401993-03-16 12:15:04 +00004545 if (--fmtcnt >= 0)
4546 c = *fmt++;
4547 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004548 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004549 width = c - '0';
4550 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004551 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004552 if (!isdigit(c))
4553 break;
4554 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 PyErr_SetString(
4556 PyExc_ValueError,
4557 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004558 goto error;
4559 }
4560 width = width*10 + (c - '0');
4561 }
4562 }
4563 if (c == '.') {
4564 prec = 0;
4565 if (--fmtcnt >= 0)
4566 c = *fmt++;
4567 if (c == '*') {
4568 v = getnextarg(args, arglen, &argidx);
4569 if (v == NULL)
4570 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 if (!PyInt_Check(v)) {
4572 PyErr_SetString(
4573 PyExc_TypeError,
4574 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004575 goto error;
4576 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004578 if (prec == -1 && PyErr_Occurred())
4579 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004580 if (prec < 0)
4581 prec = 0;
4582 if (--fmtcnt >= 0)
4583 c = *fmt++;
4584 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004585 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004586 prec = c - '0';
4587 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004588 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004589 if (!isdigit(c))
4590 break;
4591 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 PyErr_SetString(
4593 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004594 "prec too big");
4595 goto error;
4596 }
4597 prec = prec*10 + (c - '0');
4598 }
4599 }
4600 } /* prec */
4601 if (fmtcnt >= 0) {
4602 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004603 if (--fmtcnt >= 0)
4604 c = *fmt++;
4605 }
4606 }
4607 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004608 PyErr_SetString(PyExc_ValueError,
4609 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004610 goto error;
4611 }
4612 if (c != '%') {
4613 v = getnextarg(args, arglen, &argidx);
4614 if (v == NULL)
4615 goto error;
4616 }
4617 sign = 0;
4618 fill = ' ';
4619 switch (c) {
4620 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004621 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004622 len = 1;
4623 break;
4624 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004625 if (PyUnicode_Check(v)) {
4626 fmt = fmt_start;
4627 argidx = argidx_start;
4628 goto unicode;
4629 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004630 temp = _PyObject_Str(v);
4631 if (temp != NULL && PyUnicode_Check(temp)) {
4632 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004633 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004634 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004635 goto unicode;
4636 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004637 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004638 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004639 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004640 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004641 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004642 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004643 if (!PyString_Check(temp)) {
4644 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004645 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004646 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004647 goto error;
4648 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004649 pbuf = PyString_AS_STRING(temp);
4650 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004651 if (prec >= 0 && len > prec)
4652 len = prec;
4653 break;
4654 case 'i':
4655 case 'd':
4656 case 'u':
4657 case 'o':
4658 case 'x':
4659 case 'X':
4660 if (c == 'i')
4661 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004662 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004663 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004664 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004665 prec, c, &pbuf, &ilen);
4666 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004667 if (!temp)
4668 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004669 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004670 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004671 else {
4672 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004673 len = formatint(pbuf,
4674 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004675 flags, prec, c, v);
4676 if (len < 0)
4677 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004678 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004679 }
4680 if (flags & F_ZERO)
4681 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004682 break;
4683 case 'e':
4684 case 'E':
4685 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004686 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004687 case 'g':
4688 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004689 if (c == 'F')
4690 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004691 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004692 len = formatfloat(pbuf, sizeof(formatbuf),
4693 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004694 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004695 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004696 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004697 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004698 fill = '0';
4699 break;
4700 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004701 if (PyUnicode_Check(v)) {
4702 fmt = fmt_start;
4703 argidx = argidx_start;
4704 goto unicode;
4705 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004706 pbuf = formatbuf;
4707 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004708 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004709 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004710 break;
4711 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004712 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004713 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004714 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004715 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004716 (Py_ssize_t)(fmt - 1 -
4717 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004718 goto error;
4719 }
4720 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004721 if (*pbuf == '-' || *pbuf == '+') {
4722 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004723 len--;
4724 }
4725 else if (flags & F_SIGN)
4726 sign = '+';
4727 else if (flags & F_BLANK)
4728 sign = ' ';
4729 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004730 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004731 }
4732 if (width < len)
4733 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004734 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004735 reslen -= rescnt;
4736 rescnt = width + fmtcnt + 100;
4737 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004738 if (reslen < 0) {
4739 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004740 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004741 return PyErr_NoMemory();
4742 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004743 if (_PyString_Resize(&result, reslen) < 0) {
4744 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004745 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004746 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004747 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004748 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004749 }
4750 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004751 if (fill != ' ')
4752 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004753 rescnt--;
4754 if (width > len)
4755 width--;
4756 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004757 if ((flags & F_ALT) &&
4758 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004759 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004760 assert(pbuf[1] == c);
4761 if (fill != ' ') {
4762 *res++ = *pbuf++;
4763 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 }
Tim Petersfff53252001-04-12 18:38:48 +00004765 rescnt -= 2;
4766 width -= 2;
4767 if (width < 0)
4768 width = 0;
4769 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 }
4771 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004772 do {
4773 --rescnt;
4774 *res++ = fill;
4775 } while (--width > len);
4776 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004777 if (fill == ' ') {
4778 if (sign)
4779 *res++ = sign;
4780 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004781 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004782 assert(pbuf[0] == '0');
4783 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004784 *res++ = *pbuf++;
4785 *res++ = *pbuf++;
4786 }
4787 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004788 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004789 res += len;
4790 rescnt -= len;
4791 while (--width >= len) {
4792 --rescnt;
4793 *res++ = ' ';
4794 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004795 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004796 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004797 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004798 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004799 goto error;
4800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004801 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004802 } /* '%' */
4803 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004804 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004805 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004806 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004807 goto error;
4808 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004809 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004811 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004812 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004813 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004814
4815 unicode:
4816 if (args_owned) {
4817 Py_DECREF(args);
4818 args_owned = 0;
4819 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004820 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004821 if (PyTuple_Check(orig_args) && argidx > 0) {
4822 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004823 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004824 v = PyTuple_New(n);
4825 if (v == NULL)
4826 goto error;
4827 while (--n >= 0) {
4828 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4829 Py_INCREF(w);
4830 PyTuple_SET_ITEM(v, n, w);
4831 }
4832 args = v;
4833 } else {
4834 Py_INCREF(orig_args);
4835 args = orig_args;
4836 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004837 args_owned = 1;
4838 /* Take what we have of the result and let the Unicode formatting
4839 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004840 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004841 if (_PyString_Resize(&result, rescnt))
4842 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004843 fmtcnt = PyString_GET_SIZE(format) - \
4844 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004845 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4846 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004847 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004848 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004849 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004850 if (v == NULL)
4851 goto error;
4852 /* Paste what we have (result) to what the Unicode formatting
4853 function returned (v) and return the result (or error) */
4854 w = PyUnicode_Concat(result, v);
4855 Py_DECREF(result);
4856 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004857 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004858 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004859
Guido van Rossume5372401993-03-16 12:15:04 +00004860 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004861 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004862 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004863 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004864 }
Guido van Rossume5372401993-03-16 12:15:04 +00004865 return NULL;
4866}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004867
Guido van Rossum2a61e741997-01-18 07:55:05 +00004868void
Fred Drakeba096332000-07-09 07:04:36 +00004869PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004870{
4871 register PyStringObject *s = (PyStringObject *)(*p);
4872 PyObject *t;
4873 if (s == NULL || !PyString_Check(s))
4874 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004875 /* If it's a string subclass, we don't really know what putting
4876 it in the interned dict might do. */
4877 if (!PyString_CheckExact(s))
4878 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004879 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004880 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004881 if (interned == NULL) {
4882 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004883 if (interned == NULL) {
4884 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004885 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004886 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004887 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004888 t = PyDict_GetItem(interned, (PyObject *)s);
4889 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004890 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004891 Py_DECREF(*p);
4892 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004893 return;
4894 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004895
Armin Rigo79f7ad22004-08-07 19:27:39 +00004896 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004897 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004898 return;
4899 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004900 /* The two references in interned are not counted by refcnt.
4901 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004902 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004903 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004904}
4905
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004906void
4907PyString_InternImmortal(PyObject **p)
4908{
4909 PyString_InternInPlace(p);
4910 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4911 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4912 Py_INCREF(*p);
4913 }
4914}
4915
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916
4917PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004918PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004919{
4920 PyObject *s = PyString_FromString(cp);
4921 if (s == NULL)
4922 return NULL;
4923 PyString_InternInPlace(&s);
4924 return s;
4925}
4926
Guido van Rossum8cf04761997-08-02 02:57:45 +00004927void
Fred Drakeba096332000-07-09 07:04:36 +00004928PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004929{
4930 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004931 for (i = 0; i < UCHAR_MAX + 1; i++) {
4932 Py_XDECREF(characters[i]);
4933 characters[i] = NULL;
4934 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004935 Py_XDECREF(nullstring);
4936 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004937}
Barry Warsawa903ad982001-02-23 16:40:48 +00004938
Barry Warsawa903ad982001-02-23 16:40:48 +00004939void _Py_ReleaseInternedStrings(void)
4940{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004941 PyObject *keys;
4942 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004943 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004944 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945
4946 if (interned == NULL || !PyDict_Check(interned))
4947 return;
4948 keys = PyDict_Keys(interned);
4949 if (keys == NULL || !PyList_Check(keys)) {
4950 PyErr_Clear();
4951 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004952 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953
4954 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4955 detector, interned strings are not forcibly deallocated; rather, we
4956 give them their stolen references back, and then clear and DECREF
4957 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004958
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004960 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4961 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962 for (i = 0; i < n; i++) {
4963 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4964 switch (s->ob_sstate) {
4965 case SSTATE_NOT_INTERNED:
4966 /* XXX Shouldn't happen */
4967 break;
4968 case SSTATE_INTERNED_IMMORTAL:
4969 s->ob_refcnt += 1;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004970 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004971 break;
4972 case SSTATE_INTERNED_MORTAL:
4973 s->ob_refcnt += 2;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004974 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004975 break;
4976 default:
4977 Py_FatalError("Inconsistent interned string state.");
4978 }
4979 s->ob_sstate = SSTATE_NOT_INTERNED;
4980 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004981 fprintf(stderr, "total size of all interned strings: "
4982 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4983 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004984 Py_DECREF(keys);
4985 PyDict_Clear(interned);
4986 Py_DECREF(interned);
4987 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004988}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004989
4990
4991/*********************** Str Iterator ****************************/
4992
4993typedef struct {
4994 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004995 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004996 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4997} striterobject;
4998
4999static void
5000striter_dealloc(striterobject *it)
5001{
5002 _PyObject_GC_UNTRACK(it);
5003 Py_XDECREF(it->it_seq);
5004 PyObject_GC_Del(it);
5005}
5006
5007static int
5008striter_traverse(striterobject *it, visitproc visit, void *arg)
5009{
5010 Py_VISIT(it->it_seq);
5011 return 0;
5012}
5013
5014static PyObject *
5015striter_next(striterobject *it)
5016{
5017 PyStringObject *seq;
5018 PyObject *item;
5019
5020 assert(it != NULL);
5021 seq = it->it_seq;
5022 if (seq == NULL)
5023 return NULL;
5024 assert(PyString_Check(seq));
5025
5026 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005027 item = PyString_FromStringAndSize(
5028 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005029 if (item != NULL)
5030 ++it->it_index;
5031 return item;
5032 }
5033
5034 Py_DECREF(seq);
5035 it->it_seq = NULL;
5036 return NULL;
5037}
5038
5039static PyObject *
5040striter_len(striterobject *it)
5041{
5042 Py_ssize_t len = 0;
5043 if (it->it_seq)
5044 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5045 return PyInt_FromSsize_t(len);
5046}
5047
Guido van Rossum49d6b072006-08-17 21:11:47 +00005048PyDoc_STRVAR(length_hint_doc,
5049 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005050
5051static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005052 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5053 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005054 {NULL, NULL} /* sentinel */
5055};
5056
5057PyTypeObject PyStringIter_Type = {
5058 PyObject_HEAD_INIT(&PyType_Type)
5059 0, /* ob_size */
Guido van Rossum49d6b072006-08-17 21:11:47 +00005060 "striterator", /* tp_name */
5061 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005062 0, /* tp_itemsize */
5063 /* methods */
5064 (destructor)striter_dealloc, /* tp_dealloc */
5065 0, /* tp_print */
5066 0, /* tp_getattr */
5067 0, /* tp_setattr */
5068 0, /* tp_compare */
5069 0, /* tp_repr */
5070 0, /* tp_as_number */
5071 0, /* tp_as_sequence */
5072 0, /* tp_as_mapping */
5073 0, /* tp_hash */
5074 0, /* tp_call */
5075 0, /* tp_str */
5076 PyObject_GenericGetAttr, /* tp_getattro */
5077 0, /* tp_setattro */
5078 0, /* tp_as_buffer */
5079 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5080 0, /* tp_doc */
5081 (traverseproc)striter_traverse, /* tp_traverse */
5082 0, /* tp_clear */
5083 0, /* tp_richcompare */
5084 0, /* tp_weaklistoffset */
5085 PyObject_SelfIter, /* tp_iter */
5086 (iternextfunc)striter_next, /* tp_iternext */
5087 striter_methods, /* tp_methods */
5088 0,
5089};
5090
5091static PyObject *
5092str_iter(PyObject *seq)
5093{
5094 striterobject *it;
5095
5096 if (!PyString_Check(seq)) {
5097 PyErr_BadInternalCall();
5098 return NULL;
5099 }
5100 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5101 if (it == NULL)
5102 return NULL;
5103 it->it_index = 0;
5104 Py_INCREF(seq);
5105 it->it_seq = (PyStringObject *)seq;
5106 _PyObject_GC_TRACK(it);
5107 return (PyObject *)it;
5108}