blob: 699ae27739a4c7e4760cbb61b95c2e77f54aa27b [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +00008
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000176 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000245 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 n = (n*10) + *f++ - '0';
252 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000253 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000609 if (ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000613 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000614 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000615 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000622 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000623 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000624 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000632 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 default:
648 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000649 s--;
650 goto non_esc; /* an arbitry number of unescaped
651 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000654 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000655 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 return v;
657 failed:
658 Py_DECREF(v);
659 return NULL;
660}
661
Thomas Wouters477c8d52006-05-27 19:21:47 +0000662/* -------------------------------------------------------------------- */
663/* object api */
664
Martin v. Löwis18e16552006-02-15 17:27:45 +0000665static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000666string_getsize(register PyObject *op)
667{
668 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000669 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670 if (PyString_AsStringAndSize(op, &s, &len))
671 return -1;
672 return len;
673}
674
675static /*const*/ char *
676string_getbuffer(register PyObject *op)
677{
678 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000679 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000680 if (PyString_AsStringAndSize(op, &s, &len))
681 return NULL;
682 return s;
683}
684
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000686PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000688 if (PyUnicode_Check(op)) {
689 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
690 if (!op)
691 return -1;
692 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693 if (!PyString_Check(op))
694 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000695 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696}
697
698/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000699PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000701 if (PyUnicode_Check(op)) {
702 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
703 if (!op)
704 return NULL;
705 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706 if (!PyString_Check(op))
707 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709}
710
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000711int
712PyString_AsStringAndSize(register PyObject *obj,
713 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000714 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715{
716 if (s == NULL) {
717 PyErr_BadInternalCall();
718 return -1;
719 }
720
721 if (!PyString_Check(obj)) {
722 if (PyUnicode_Check(obj)) {
723 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
724 if (obj == NULL)
725 return -1;
726 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000727 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000730 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000731 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000739 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Thomas Wouters477c8d52006-05-27 19:21:47 +0000747/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748/* Methods */
749
Thomas Wouters477c8d52006-05-27 19:21:47 +0000750#define STRINGLIB_CHAR char
751
752#define STRINGLIB_CMP memcmp
753#define STRINGLIB_LEN PyString_GET_SIZE
754#define STRINGLIB_NEW PyString_FromStringAndSize
755#define STRINGLIB_STR PyString_AS_STRING
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000756#define STRINGLIB_WANT_CONTAINS_OBJ 1
Thomas Wouters477c8d52006-05-27 19:21:47 +0000757
758#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000759#define STRINGLIB_CHECK_EXACT PyString_CheckExact
760#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000761
762#include "stringlib/fastsearch.h"
763
764#include "stringlib/count.h"
765#include "stringlib/find.h"
766#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000767#include "stringlib/ctype.h"
768#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000769
770
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771PyObject *
772PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000774 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000775 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000776 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000777 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000778 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000779 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000780 PyErr_SetString(PyExc_OverflowError,
781 "string is too large to make repr");
782 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000783 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000785 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786 }
787 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000788 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000789 register Py_UNICODE c;
790 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 int quote;
792
Thomas Wouters7e474022000-07-16 12:04:32 +0000793 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000796 char *test, *start;
797 start = PyString_AS_STRING(op);
798 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000799 if (*test == '"') {
800 quote = '\''; /* switch back to single quote */
801 goto decided;
802 }
803 else if (*test == '\'')
804 quote = '"';
805 }
806 decided:
807 ;
808 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000809
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000810 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000811 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000812 /* There's at least enough room for a hex escape
813 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000814 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000818 else if (c == '\t')
819 *p++ = '\\', *p++ = 't';
820 else if (c == '\n')
821 *p++ = '\\', *p++ = 'n';
822 else if (c == '\r')
823 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000824 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000825 *p++ = '\\';
826 *p++ = 'x';
827 *p++ = hexdigits[(c & 0xf0) >> 4];
828 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000829 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 else
831 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000833 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000834 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000836 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
837 Py_DECREF(v);
838 return NULL;
839 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000840 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842}
843
Guido van Rossum189f1df2001-05-01 16:51:53 +0000844static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000845string_repr(PyObject *op)
846{
847 return PyString_Repr(op, 1);
848}
849
850static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000851string_str(PyObject *s)
852{
Tim Petersc9933152001-10-16 20:18:24 +0000853 assert(PyString_Check(s));
854 if (PyString_CheckExact(s)) {
855 Py_INCREF(s);
856 return s;
857 }
858 else {
859 /* Subtype -- return genuine string with the same value. */
860 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000861 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000862 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863}
864
Martin v. Löwis18e16552006-02-15 17:27:45 +0000865static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000866string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000868 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869}
870
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000872string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000874 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000875 register PyStringObject *op;
876 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000877 if (PyUnicode_Check(bb))
878 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000879 if (PyBytes_Check(bb))
880 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000881 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000882 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000883 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884 return NULL;
885 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000888 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000889 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000890 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000891 Py_INCREF(bb);
892 return bb;
893 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 Py_INCREF(a);
895 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000897 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000898 if (size < 0) {
899 PyErr_SetString(PyExc_OverflowError,
900 "strings are too large to concat");
901 return NULL;
902 }
903
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000904 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000905 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000906 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000908 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000909 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000910 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000911 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
912 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915#undef b
916}
917
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000919string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921 register Py_ssize_t i;
922 register Py_ssize_t j;
923 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000925 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926 if (n < 0)
927 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000928 /* watch out for overflows: the size can overflow int,
929 * and the # of bytes needed can overflow size_t
930 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000931 size = Py_Size(a) * n;
932 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000933 PyErr_SetString(PyExc_OverflowError,
934 "repeated string is too long");
935 return NULL;
936 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000937 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 Py_INCREF(a);
939 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 }
Tim Peterse7c05322004-06-27 17:24:49 +0000941 nbytes = (size_t)size;
942 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000943 PyErr_SetString(PyExc_OverflowError,
944 "repeated string is too long");
945 return NULL;
946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000948 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000949 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000951 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000952 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000953 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000954 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000955 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000956 memset(op->ob_sval, a->ob_sval[0] , n);
957 return (PyObject *) op;
958 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 i = 0;
960 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000961 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
962 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 }
964 while (i < size) {
965 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000966 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i += j;
968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970}
971
Guido van Rossum9284a572000-03-07 15:53:43 +0000972static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000973string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000974{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000975 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000976 if (PyUnicode_Check(sub_obj))
977 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000978 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000979 PyErr_Format(PyExc_TypeError,
980 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000981 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000982 return -1;
983 }
Guido van Rossum9284a572000-03-07 15:53:43 +0000984 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000985
Thomas Wouters477c8d52006-05-27 19:21:47 +0000986 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +0000987}
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000990string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000992 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000994 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 return NULL;
997 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000998 pchar = a->ob_sval[i];
999 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001000 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001001 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001002 else {
1003#ifdef COUNT_ALLOCS
1004 one_strings++;
1005#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001006 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001007 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001008 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009}
1010
Martin v. Löwiscd353062001-05-24 16:56:35 +00001011static PyObject*
1012string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001013{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001014 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001015 Py_ssize_t len_a, len_b;
1016 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001017 PyObject *result;
1018
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001019 /* Make sure both arguments are strings. */
1020 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001021 result = Py_NotImplemented;
1022 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001023 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001024 if (a == b) {
1025 switch (op) {
1026 case Py_EQ:case Py_LE:case Py_GE:
1027 result = Py_True;
1028 goto out;
1029 case Py_NE:case Py_LT:case Py_GT:
1030 result = Py_False;
1031 goto out;
1032 }
1033 }
1034 if (op == Py_EQ) {
1035 /* Supporting Py_NE here as well does not save
1036 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001037 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001038 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001039 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001040 result = Py_True;
1041 } else {
1042 result = Py_False;
1043 }
1044 goto out;
1045 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001046 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001047 min_len = (len_a < len_b) ? len_a : len_b;
1048 if (min_len > 0) {
1049 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1050 if (c==0)
1051 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001052 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001053 c = 0;
1054 if (c == 0)
1055 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1056 switch (op) {
1057 case Py_LT: c = c < 0; break;
1058 case Py_LE: c = c <= 0; break;
1059 case Py_EQ: assert(0); break; /* unreachable */
1060 case Py_NE: c = c != 0; break;
1061 case Py_GT: c = c > 0; break;
1062 case Py_GE: c = c >= 0; break;
1063 default:
1064 result = Py_NotImplemented;
1065 goto out;
1066 }
1067 result = c ? Py_True : Py_False;
1068 out:
1069 Py_INCREF(result);
1070 return result;
1071}
1072
1073int
1074_PyString_Eq(PyObject *o1, PyObject *o2)
1075{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001076 PyStringObject *a = (PyStringObject*) o1;
1077 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001078 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001079 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001080 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001081}
1082
Guido van Rossum9bfef441993-03-29 10:43:31 +00001083static long
Fred Drakeba096332000-07-09 07:04:36 +00001084string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001085{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001086 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001087 register unsigned char *p;
1088 register long x;
1089
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001090 if (a->ob_shash != -1)
1091 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001092 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001093 p = (unsigned char *) a->ob_sval;
1094 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001095 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001096 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001097 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001098 if (x == -1)
1099 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001100 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001101 return x;
1102}
1103
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001104static PyObject*
1105string_subscript(PyStringObject* self, PyObject* item)
1106{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001107 if (PyIndex_Check(item)) {
1108 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001109 if (i == -1 && PyErr_Occurred())
1110 return NULL;
1111 if (i < 0)
1112 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001113 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001114 }
1115 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001116 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001117 char* source_buf;
1118 char* result_buf;
1119 PyObject* result;
1120
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001121 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001122 PyString_GET_SIZE(self),
1123 &start, &stop, &step, &slicelength) < 0) {
1124 return NULL;
1125 }
1126
1127 if (slicelength <= 0) {
1128 return PyString_FromStringAndSize("", 0);
1129 }
Thomas Woutersed03b412007-08-28 21:37:11 +00001130 else if (start == 0 && step == 1 &&
1131 slicelength == PyString_GET_SIZE(self) &&
1132 PyString_CheckExact(self)) {
1133 Py_INCREF(self);
1134 return (PyObject *)self;
1135 }
1136 else if (step == 1) {
1137 return PyString_FromStringAndSize(
1138 PyString_AS_STRING(self) + start,
1139 slicelength);
1140 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141 else {
1142 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001143 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001144 if (result_buf == NULL)
1145 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001146
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001148 cur += step, i++) {
1149 result_buf[i] = source_buf[cur];
1150 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001151
1152 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001153 slicelength);
1154 PyMem_Free(result_buf);
1155 return result;
1156 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001157 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001158 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001159 PyErr_Format(PyExc_TypeError,
1160 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001161 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001162 return NULL;
1163 }
1164}
1165
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001166static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001167string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001168{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001169 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001170}
1171
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001172static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001173 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001174 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001175 (ssizeargfunc)string_repeat, /*sq_repeat*/
1176 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001177 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001178 0, /*sq_ass_item*/
1179 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001180 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001181};
1182
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001183static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001184 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001185 (binaryfunc)string_subscript,
1186 0,
1187};
1188
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001189static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001190 (getbufferproc)string_buffer_getbuffer,
1191 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001192};
1193
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001195#define LEFTSTRIP 0
1196#define RIGHTSTRIP 1
1197#define BOTHSTRIP 2
1198
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001199/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001200static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1201
1202#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001203
Thomas Wouters477c8d52006-05-27 19:21:47 +00001204
1205/* Don't call if length < 2 */
1206#define Py_STRING_MATCH(target, offset, pattern, length) \
1207 (target[offset] == pattern[0] && \
1208 target[offset+length-1] == pattern[length-1] && \
1209 !memcmp(target+offset+1, pattern+1, length-2) )
1210
1211
1212/* Overallocate the initial list to reduce the number of reallocs for small
1213 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1214 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1215 text (roughly 11 words per line) and field delimited data (usually 1-10
1216 fields). For large strings the split algorithms are bandwidth limited
1217 so increasing the preallocation likely will not improve things.*/
1218
1219#define MAX_PREALLOC 12
1220
1221/* 5 splits gives 6 elements */
1222#define PREALLOC_SIZE(maxsplit) \
1223 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1224
Thomas Wouters477c8d52006-05-27 19:21:47 +00001225#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001226 str = PyString_FromStringAndSize((data) + (left), \
1227 (right) - (left)); \
1228 if (str == NULL) \
1229 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001230 if (count < MAX_PREALLOC) { \
1231 PyList_SET_ITEM(list, count, str); \
1232 } else { \
1233 if (PyList_Append(list, str)) { \
1234 Py_DECREF(str); \
1235 goto onError; \
1236 } \
1237 else \
1238 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001239 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001241
Thomas Wouters477c8d52006-05-27 19:21:47 +00001242/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001243#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001244
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001245#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1246#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1247#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1248#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001249
1250Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001251split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001252{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001253 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001254 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001255 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256
1257 if (list == NULL)
1258 return NULL;
1259
Thomas Wouters477c8d52006-05-27 19:21:47 +00001260 i = j = 0;
1261
1262 while (maxsplit-- > 0) {
1263 SKIP_SPACE(s, i, len);
1264 if (i==len) break;
1265 j = i; i++;
1266 SKIP_NONSPACE(s, i, len);
1267 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001268 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001269
1270 if (i < len) {
1271 /* Only occurs when maxsplit was reached */
1272 /* Skip any remaining whitespace and copy to end of string */
1273 SKIP_SPACE(s, i, len);
1274 if (i != len)
1275 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001279 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 Py_DECREF(list);
1281 return NULL;
1282}
1283
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001285split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001286{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001288 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001289 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001290
1291 if (list == NULL)
1292 return NULL;
1293
Thomas Wouters477c8d52006-05-27 19:21:47 +00001294 i = j = 0;
1295 while ((j < len) && (maxcount-- > 0)) {
1296 for(; j<len; j++) {
1297 /* I found that using memchr makes no difference */
1298 if (s[j] == ch) {
1299 SPLIT_ADD(s, i, j);
1300 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001301 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001302 }
1303 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001304 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001305 if (i <= len) {
1306 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001307 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001308 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001309 return list;
1310
1311 onError:
1312 Py_DECREF(list);
1313 return NULL;
1314}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001316PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317"S.split([sep [,maxsplit]]) -> list of strings\n\
1318\n\
1319Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001321splits are done. If sep is not specified or is None, any\n\
1322whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323
1324static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001325string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001327 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001328 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001330 PyObject *list, *str, *subobj = Py_None;
1331#ifdef USE_FAST
1332 Py_ssize_t pos;
1333#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001335 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001338 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001339 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 if (PyString_Check(subobj)) {
1342 sub = PyString_AS_STRING(subobj);
1343 n = PyString_GET_SIZE(subobj);
1344 }
1345 else if (PyUnicode_Check(subobj))
1346 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1347 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1348 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001349
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 if (n == 0) {
1351 PyErr_SetString(PyExc_ValueError, "empty separator");
1352 return NULL;
1353 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001354 else if (n == 1)
1355 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356
Thomas Wouters477c8d52006-05-27 19:21:47 +00001357 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 if (list == NULL)
1359 return NULL;
1360
Thomas Wouters477c8d52006-05-27 19:21:47 +00001361#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001363 while (maxsplit-- > 0) {
1364 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1365 if (pos < 0)
1366 break;
1367 j = i+pos;
1368 SPLIT_ADD(s, i, j);
1369 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001371#else
1372 i = j = 0;
1373 while ((j+n <= len) && (maxsplit-- > 0)) {
1374 for (; j+n <= len; j++) {
1375 if (Py_STRING_MATCH(s, j, sub, n)) {
1376 SPLIT_ADD(s, i, j);
1377 i = j = j + n;
1378 break;
1379 }
1380 }
1381 }
1382#endif
1383 SPLIT_ADD(s, i, len);
1384 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return list;
1386
Thomas Wouters477c8d52006-05-27 19:21:47 +00001387 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388 Py_DECREF(list);
1389 return NULL;
1390}
1391
Thomas Wouters477c8d52006-05-27 19:21:47 +00001392PyDoc_STRVAR(partition__doc__,
1393"S.partition(sep) -> (head, sep, tail)\n\
1394\n\
1395Searches for the separator sep in S, and returns the part before it,\n\
1396the separator itself, and the part after it. If the separator is not\n\
1397found, returns S and two empty strings.");
1398
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001399static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001400string_partition(PyStringObject *self, PyObject *sep_obj)
1401{
1402 const char *sep;
1403 Py_ssize_t sep_len;
1404
1405 if (PyString_Check(sep_obj)) {
1406 sep = PyString_AS_STRING(sep_obj);
1407 sep_len = PyString_GET_SIZE(sep_obj);
1408 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001409 else if (PyUnicode_Check(sep_obj))
1410 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001411 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1412 return NULL;
1413
1414 return stringlib_partition(
1415 (PyObject*) self,
1416 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1417 sep_obj, sep, sep_len
1418 );
1419}
1420
1421PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001422"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001423\n\
1424Searches for the separator sep in S, starting at the end of S, and returns\n\
1425the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001426separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001427
1428static PyObject *
1429string_rpartition(PyStringObject *self, PyObject *sep_obj)
1430{
1431 const char *sep;
1432 Py_ssize_t sep_len;
1433
1434 if (PyString_Check(sep_obj)) {
1435 sep = PyString_AS_STRING(sep_obj);
1436 sep_len = PyString_GET_SIZE(sep_obj);
1437 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001438 else if (PyUnicode_Check(sep_obj))
1439 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001440 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1441 return NULL;
1442
1443 return stringlib_rpartition(
1444 (PyObject*) self,
1445 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1446 sep_obj, sep, sep_len
1447 );
1448}
1449
1450Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001452{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001453 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001454 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001455 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001456
1457 if (list == NULL)
1458 return NULL;
1459
Thomas Wouters477c8d52006-05-27 19:21:47 +00001460 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001461
Thomas Wouters477c8d52006-05-27 19:21:47 +00001462 while (maxsplit-- > 0) {
1463 RSKIP_SPACE(s, i);
1464 if (i<0) break;
1465 j = i; i--;
1466 RSKIP_NONSPACE(s, i);
1467 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001468 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469 if (i >= 0) {
1470 /* Only occurs when maxsplit was reached */
1471 /* Skip any remaining whitespace and copy to beginning of string */
1472 RSKIP_SPACE(s, i);
1473 if (i >= 0)
1474 SPLIT_ADD(s, 0, i + 1);
1475
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001476 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001477 FIX_PREALLOC_SIZE(list);
1478 if (PyList_Reverse(list) < 0)
1479 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001480 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001481 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001482 Py_DECREF(list);
1483 return NULL;
1484}
1485
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001487rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001489 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001491 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492
1493 if (list == NULL)
1494 return NULL;
1495
Thomas Wouters477c8d52006-05-27 19:21:47 +00001496 i = j = len - 1;
1497 while ((i >= 0) && (maxcount-- > 0)) {
1498 for (; i >= 0; i--) {
1499 if (s[i] == ch) {
1500 SPLIT_ADD(s, i + 1, j + 1);
1501 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001502 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001503 }
1504 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001505 }
1506 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001507 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 FIX_PREALLOC_SIZE(list);
1510 if (PyList_Reverse(list) < 0)
1511 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512 return list;
1513
1514 onError:
1515 Py_DECREF(list);
1516 return NULL;
1517}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001518
1519PyDoc_STRVAR(rsplit__doc__,
1520"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1521\n\
1522Return a list of the words in the string S, using sep as the\n\
1523delimiter string, starting at the end of the string and working\n\
1524to the front. If maxsplit is given, at most maxsplit splits are\n\
1525done. If sep is not specified or is None, any whitespace string\n\
1526is a separator.");
1527
1528static PyObject *
1529string_rsplit(PyStringObject *self, PyObject *args)
1530{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001531 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001532 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001533 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001534 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001535
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001536 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001537 return NULL;
1538 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001539 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001540 if (subobj == Py_None)
1541 return rsplit_whitespace(s, len, maxsplit);
1542 if (PyString_Check(subobj)) {
1543 sub = PyString_AS_STRING(subobj);
1544 n = PyString_GET_SIZE(subobj);
1545 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001546 else if (PyUnicode_Check(subobj))
1547 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001548 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1549 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001550
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001551 if (n == 0) {
1552 PyErr_SetString(PyExc_ValueError, "empty separator");
1553 return NULL;
1554 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001555 else if (n == 1)
1556 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001557
Thomas Wouters477c8d52006-05-27 19:21:47 +00001558 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001559 if (list == NULL)
1560 return NULL;
1561
1562 j = len;
1563 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001564
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 while ( (i >= 0) && (maxsplit-- > 0) ) {
1566 for (; i>=0; i--) {
1567 if (Py_STRING_MATCH(s, i, sub, n)) {
1568 SPLIT_ADD(s, i + n, j);
1569 j = i;
1570 i -= n;
1571 break;
1572 }
1573 }
1574 }
1575 SPLIT_ADD(s, 0, j);
1576 FIX_PREALLOC_SIZE(list);
1577 if (PyList_Reverse(list) < 0)
1578 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579 return list;
1580
Thomas Wouters477c8d52006-05-27 19:21:47 +00001581onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001586#undef SPLIT_ADD
1587#undef MAX_PREALLOC
1588#undef PREALLOC_SIZE
1589
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.join(sequence) -> string\n\
1593\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001594Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596
1597static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001598string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599{
1600 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001601 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001604 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001606 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001607 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608
Tim Peters19fe14e2001-01-19 03:03:47 +00001609 seq = PySequence_Fast(orig, "");
1610 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001611 return NULL;
1612 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001613
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001614 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 if (seqlen == 0) {
1616 Py_DECREF(seq);
1617 return PyString_FromString("");
1618 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001620 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001621 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1622 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001623 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001624 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001625 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001627
Raymond Hettinger674f2412004-08-23 23:23:54 +00001628 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001629 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001630 * Do a pre-pass to figure out the total amount of space we'll
1631 * need (sz), see whether any argument is absurd, and defer to
1632 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001633 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001634 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001635 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001636 item = PySequence_Fast_GET_ITEM(seq, i);
1637 if (!PyString_Check(item)){
1638 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001639 /* Defer to Unicode join.
1640 * CAUTION: There's no gurantee that the
1641 * original sequence can be iterated over
1642 * again, so we must pass seq here.
1643 */
1644 PyObject *result;
1645 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001646 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001647 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001648 }
1649 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001650 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001651 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001652 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001653 Py_DECREF(seq);
1654 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001656 sz += PyString_GET_SIZE(item);
1657 if (i != 0)
1658 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001659 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001661 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 Py_DECREF(seq);
1663 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001665 }
1666
1667 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001668 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001669 if (res == NULL) {
1670 Py_DECREF(seq);
1671 return NULL;
1672 }
1673
1674 /* Catenate everything. */
1675 p = PyString_AS_STRING(res);
1676 for (i = 0; i < seqlen; ++i) {
1677 size_t n;
1678 item = PySequence_Fast_GET_ITEM(seq, i);
1679 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001680 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001681 p += n;
1682 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001683 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001684 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001685 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001687
Jeremy Hylton49048292000-07-11 03:28:17 +00001688 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690}
1691
Tim Peters52e155e2001-06-16 05:42:57 +00001692PyObject *
1693_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001694{
Tim Petersa7259592001-06-16 05:11:17 +00001695 assert(sep != NULL && PyString_Check(sep));
1696 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001697 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001698}
1699
Thomas Wouters477c8d52006-05-27 19:21:47 +00001700Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001701string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001702{
1703 if (*end > len)
1704 *end = len;
1705 else if (*end < 0)
1706 *end += len;
1707 if (*end < 0)
1708 *end = 0;
1709 if (*start < 0)
1710 *start += len;
1711 if (*start < 0)
1712 *start = 0;
1713}
1714
Thomas Wouters477c8d52006-05-27 19:21:47 +00001715Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001716string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001718 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001719 const char *sub;
1720 Py_ssize_t sub_len;
1721 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1724 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001725 return -2;
1726 if (PyString_Check(subobj)) {
1727 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001728 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001729 }
1730 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731 return PyUnicode_Find(
1732 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001733 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001734 /* XXX - the "expected a character buffer object" is pretty
1735 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736 return -2;
1737
Thomas Wouters477c8d52006-05-27 19:21:47 +00001738 if (dir > 0)
1739 return stringlib_find_slice(
1740 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1741 sub, sub_len, start, end);
1742 else
1743 return stringlib_rfind_slice(
1744 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1745 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746}
1747
1748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001749PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750"S.find(sub [,start [,end]]) -> int\n\
1751\n\
1752Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001753such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754arguments start and end are interpreted as in slice notation.\n\
1755\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001756Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757
1758static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001759string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001761 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 if (result == -2)
1763 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001764 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765}
1766
1767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001768PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769"S.index(sub [,start [,end]]) -> int\n\
1770\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772
1773static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001774string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001776 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 if (result == -2)
1778 return NULL;
1779 if (result == -1) {
1780 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001781 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 return NULL;
1783 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001784 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785}
1786
1787
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001788PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789"S.rfind(sub [,start [,end]]) -> int\n\
1790\n\
1791Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001792such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793arguments start and end are interpreted as in slice notation.\n\
1794\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001798string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001800 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801 if (result == -2)
1802 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001803 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804}
1805
1806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808"S.rindex(sub [,start [,end]]) -> int\n\
1809\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
1812static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001813string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001815 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 if (result == -2)
1817 return NULL;
1818 if (result == -1) {
1819 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001820 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821 return NULL;
1822 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001823 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824}
1825
1826
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001828do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1829{
1830 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001832 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001833 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1834 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001835
1836 i = 0;
1837 if (striptype != RIGHTSTRIP) {
1838 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1839 i++;
1840 }
1841 }
1842
1843 j = len;
1844 if (striptype != LEFTSTRIP) {
1845 do {
1846 j--;
1847 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1848 j++;
1849 }
1850
1851 if (i == 0 && j == len && PyString_CheckExact(self)) {
1852 Py_INCREF(self);
1853 return (PyObject*)self;
1854 }
1855 else
1856 return PyString_FromStringAndSize(s+i, j-i);
1857}
1858
1859
Thomas Wouters477c8d52006-05-27 19:21:47 +00001860Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001861do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862{
1863 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001864 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 i = 0;
1867 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001868 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 i++;
1870 }
1871 }
1872
1873 j = len;
1874 if (striptype != LEFTSTRIP) {
1875 do {
1876 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001877 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 j++;
1879 }
1880
Tim Peters8fa5dd02001-09-12 02:18:30 +00001881 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 Py_INCREF(self);
1883 return (PyObject*)self;
1884 }
1885 else
1886 return PyString_FromStringAndSize(s+i, j-i);
1887}
1888
1889
Thomas Wouters477c8d52006-05-27 19:21:47 +00001890Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001891do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1892{
1893 PyObject *sep = NULL;
1894
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001895 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001896 return NULL;
1897
1898 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001899 if (PyString_Check(sep))
1900 return do_xstrip(self, striptype, sep);
1901 else if (PyUnicode_Check(sep)) {
1902 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1903 PyObject *res;
1904 if (uniself==NULL)
1905 return NULL;
1906 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1907 striptype, sep);
1908 Py_DECREF(uniself);
1909 return res;
1910 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001911 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001912 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001913 STRIPNAME(striptype));
1914 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001915 }
1916
1917 return do_strip(self, striptype);
1918}
1919
1920
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001921PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001922"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923\n\
1924Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001925whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001930string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, BOTHSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001940"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
1946static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001947string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, LEFTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953}
1954
1955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001956PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001957"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001960If chars is given and not None, remove characters in chars instead.\n\
1961If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
1963static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001966 if (PyTuple_GET_SIZE(args) == 0)
1967 return do_strip(self, RIGHTSTRIP); /* Common case */
1968 else
1969 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970}
1971
1972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001973PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974"S.count(sub[, start[, end]]) -> int\n\
1975\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001976Return the number of non-overlapping occurrences of substring sub in\n\
1977string S[start:end]. Optional arguments start and end are interpreted\n\
1978as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979
1980static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001981string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001983 PyObject *sub_obj;
1984 const char *str = PyString_AS_STRING(self), *sub;
1985 Py_ssize_t sub_len;
1986 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987
Thomas Wouters477c8d52006-05-27 19:21:47 +00001988 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1989 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001991
Thomas Wouters477c8d52006-05-27 19:21:47 +00001992 if (PyString_Check(sub_obj)) {
1993 sub = PyString_AS_STRING(sub_obj);
1994 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001996 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001997 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001998 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001999 if (count == -1)
2000 return NULL;
2001 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002002 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002003 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002004 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 return NULL;
2006
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002008
Thomas Wouters477c8d52006-05-27 19:21:47 +00002009 return PyInt_FromSsize_t(
2010 stringlib_count(str + start, end - start, sub, sub_len)
2011 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012}
2013
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002015PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016"S.translate(table [,deletechars]) -> string\n\
2017\n\
2018Return a copy of the string S, where all characters occurring\n\
2019in the optional argument deletechars are removed, and the\n\
2020remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022
2023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002024string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002027 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002028 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002030 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002031 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032 PyObject *result;
2033 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002034 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002036 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039
2040 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002041 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042 tablen = PyString_GET_SIZE(tableobj);
2043 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002044 else if (tableobj == Py_None) {
2045 table = NULL;
2046 tablen = 256;
2047 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002049 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050 parameter; instead a mapping to None will cause characters
2051 to be deleted. */
2052 if (delobj != NULL) {
2053 PyErr_SetString(PyExc_TypeError,
2054 "deletions are implemented differently for unicode");
2055 return NULL;
2056 }
2057 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2058 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002059 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
Martin v. Löwis00b61272002-12-12 20:03:19 +00002062 if (tablen != 256) {
2063 PyErr_SetString(PyExc_ValueError,
2064 "translation table must be 256 characters long");
2065 return NULL;
2066 }
2067
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068 if (delobj != NULL) {
2069 if (PyString_Check(delobj)) {
2070 del_table = PyString_AS_STRING(delobj);
2071 dellen = PyString_GET_SIZE(delobj);
2072 }
2073 else if (PyUnicode_Check(delobj)) {
2074 PyErr_SetString(PyExc_TypeError,
2075 "deletions are implemented differently for unicode");
2076 return NULL;
2077 }
2078 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2079 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080 }
2081 else {
2082 del_table = NULL;
2083 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 }
2085
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002086 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087 result = PyString_FromStringAndSize((char *)NULL, inlen);
2088 if (result == NULL)
2089 return NULL;
2090 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002091 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
Guido van Rossumd8faa362007-04-27 19:54:29 +00002093 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094 /* If no deletions are required, use faster code */
2095 for (i = inlen; --i >= 0; ) {
2096 c = Py_CHARMASK(*input++);
2097 if (Py_CHARMASK((*output++ = table[c])) != c)
2098 changed = 1;
2099 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002100 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101 return result;
2102 Py_DECREF(result);
2103 Py_INCREF(input_obj);
2104 return input_obj;
2105 }
2106
Guido van Rossumd8faa362007-04-27 19:54:29 +00002107 if (table == NULL) {
2108 for (i = 0; i < 256; i++)
2109 trans_table[i] = Py_CHARMASK(i);
2110 } else {
2111 for (i = 0; i < 256; i++)
2112 trans_table[i] = Py_CHARMASK(table[i]);
2113 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
2115 for (i = 0; i < dellen; i++)
2116 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2117
2118 for (i = inlen; --i >= 0; ) {
2119 c = Py_CHARMASK(*input++);
2120 if (trans_table[c] != -1)
2121 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2122 continue;
2123 changed = 1;
2124 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002125 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126 Py_DECREF(result);
2127 Py_INCREF(input_obj);
2128 return input_obj;
2129 }
2130 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002131 if (inlen > 0)
2132 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 return result;
2134}
2135
2136
Thomas Wouters477c8d52006-05-27 19:21:47 +00002137#define FORWARD 1
2138#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
Thomas Wouters477c8d52006-05-27 19:21:47 +00002140/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142#define findchar(target, target_len, c) \
2143 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144
Thomas Wouters477c8d52006-05-27 19:21:47 +00002145/* String ops must return a string. */
2146/* If the object is subclass of string, create a copy */
2147Py_LOCAL(PyStringObject *)
2148return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002150 if (PyString_CheckExact(self)) {
2151 Py_INCREF(self);
2152 return self;
2153 }
2154 return (PyStringObject *)PyString_FromStringAndSize(
2155 PyString_AS_STRING(self),
2156 PyString_GET_SIZE(self));
2157}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002160countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161{
2162 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002163 const char *start=target;
2164 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
Thomas Wouters477c8d52006-05-27 19:21:47 +00002166 while ( (start=findchar(start, end-start, c)) != NULL ) {
2167 count++;
2168 if (count >= maxcount)
2169 break;
2170 start += 1;
2171 }
2172 return count;
2173}
2174
2175Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002176findstring(const char *target, Py_ssize_t target_len,
2177 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002178 Py_ssize_t start,
2179 Py_ssize_t end,
2180 int direction)
2181{
2182 if (start < 0) {
2183 start += target_len;
2184 if (start < 0)
2185 start = 0;
2186 }
2187 if (end > target_len) {
2188 end = target_len;
2189 } else if (end < 0) {
2190 end += target_len;
2191 if (end < 0)
2192 end = 0;
2193 }
2194
2195 /* zero-length substrings always match at the first attempt */
2196 if (pattern_len == 0)
2197 return (direction > 0) ? start : end;
2198
2199 end -= pattern_len;
2200
2201 if (direction < 0) {
2202 for (; end >= start; end--)
2203 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2204 return end;
2205 } else {
2206 for (; start <= end; start++)
2207 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2208 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 }
2210 return -1;
2211}
2212
Thomas Wouters477c8d52006-05-27 19:21:47 +00002213Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002214countstring(const char *target, Py_ssize_t target_len,
2215 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002216 Py_ssize_t start,
2217 Py_ssize_t end,
2218 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002220 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221
Thomas Wouters477c8d52006-05-27 19:21:47 +00002222 if (start < 0) {
2223 start += target_len;
2224 if (start < 0)
2225 start = 0;
2226 }
2227 if (end > target_len) {
2228 end = target_len;
2229 } else if (end < 0) {
2230 end += target_len;
2231 if (end < 0)
2232 end = 0;
2233 }
2234
2235 /* zero-length substrings match everywhere */
2236 if (pattern_len == 0 || maxcount == 0) {
2237 if (target_len+1 < maxcount)
2238 return target_len+1;
2239 return maxcount;
2240 }
2241
2242 end -= pattern_len;
2243 if (direction < 0) {
2244 for (; (end >= start); end--)
2245 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2246 count++;
2247 if (--maxcount <= 0) break;
2248 end -= pattern_len-1;
2249 }
2250 } else {
2251 for (; (start <= end); start++)
2252 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2253 count++;
2254 if (--maxcount <= 0)
2255 break;
2256 start += pattern_len-1;
2257 }
2258 }
2259 return count;
2260}
2261
2262
2263/* Algorithms for different cases of string replacement */
2264
2265/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2266Py_LOCAL(PyStringObject *)
2267replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 Py_ssize_t maxcount)
2270{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002271 char *self_s, *result_s;
2272 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002273 Py_ssize_t count, i, product;
2274 PyStringObject *result;
2275
2276 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002277
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278 /* 1 at the end plus 1 after every character */
2279 count = self_len+1;
2280 if (maxcount < count)
2281 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002282
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 /* Check for overflow */
2284 /* result_len = count * to_len + self_len; */
2285 product = count * to_len;
2286 if (product / to_len != count) {
2287 PyErr_SetString(PyExc_OverflowError,
2288 "replace string is too long");
2289 return NULL;
2290 }
2291 result_len = product + self_len;
2292 if (result_len < 0) {
2293 PyErr_SetString(PyExc_OverflowError,
2294 "replace string is too long");
2295 return NULL;
2296 }
2297
2298 if (! (result = (PyStringObject *)
2299 PyString_FromStringAndSize(NULL, result_len)) )
2300 return NULL;
2301
2302 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303 result_s = PyString_AS_STRING(result);
2304
2305 /* TODO: special case single character, which doesn't need memcpy */
2306
2307 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002308 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002309 result_s += to_len;
2310 count -= 1;
2311
2312 for (i=0; i<count; i++) {
2313 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002314 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002315 result_s += to_len;
2316 }
2317
2318 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002319 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320
2321 return result;
2322}
2323
2324/* Special case for deleting a single character */
2325/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2326Py_LOCAL(PyStringObject *)
2327replace_delete_single_character(PyStringObject *self,
2328 char from_c, Py_ssize_t maxcount)
2329{
2330 char *self_s, *result_s;
2331 char *start, *next, *end;
2332 Py_ssize_t self_len, result_len;
2333 Py_ssize_t count;
2334 PyStringObject *result;
2335
2336 self_len = PyString_GET_SIZE(self);
2337 self_s = PyString_AS_STRING(self);
2338
2339 count = countchar(self_s, self_len, from_c, maxcount);
2340 if (count == 0) {
2341 return return_self(self);
2342 }
2343
2344 result_len = self_len - count; /* from_len == 1 */
2345 assert(result_len>=0);
2346
2347 if ( (result = (PyStringObject *)
2348 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2349 return NULL;
2350 result_s = PyString_AS_STRING(result);
2351
2352 start = self_s;
2353 end = self_s + self_len;
2354 while (count-- > 0) {
2355 next = findchar(start, end-start, from_c);
2356 if (next == NULL)
2357 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002358 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359 result_s += (next-start);
2360 start = next+1;
2361 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002362 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 return result;
2365}
2366
2367/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2368
2369Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002370replace_delete_substring(PyStringObject *self,
2371 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002372 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002373 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002375 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002376 Py_ssize_t count, offset;
2377 PyStringObject *result;
2378
2379 self_len = PyString_GET_SIZE(self);
2380 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002381
2382 count = countstring(self_s, self_len,
2383 from_s, from_len,
2384 0, self_len, 1,
2385 maxcount);
2386
2387 if (count == 0) {
2388 /* no matches */
2389 return return_self(self);
2390 }
2391
2392 result_len = self_len - (count * from_len);
2393 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002394
Thomas Wouters477c8d52006-05-27 19:21:47 +00002395 if ( (result = (PyStringObject *)
2396 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2397 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002398
Thomas Wouters477c8d52006-05-27 19:21:47 +00002399 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002400
Thomas Wouters477c8d52006-05-27 19:21:47 +00002401 start = self_s;
2402 end = self_s + self_len;
2403 while (count-- > 0) {
2404 offset = findstring(start, end-start,
2405 from_s, from_len,
2406 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 if (offset == -1)
2408 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002410
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002411 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002412
Thomas Wouters477c8d52006-05-27 19:21:47 +00002413 result_s += (next-start);
2414 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002416 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002417 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418}
2419
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2421Py_LOCAL(PyStringObject *)
2422replace_single_character_in_place(PyStringObject *self,
2423 char from_c, char to_c,
2424 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426 char *self_s, *result_s, *start, *end, *next;
2427 Py_ssize_t self_len;
2428 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002429
Thomas Wouters477c8d52006-05-27 19:21:47 +00002430 /* The result string will be the same size */
2431 self_s = PyString_AS_STRING(self);
2432 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002433
Thomas Wouters477c8d52006-05-27 19:21:47 +00002434 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002435
Thomas Wouters477c8d52006-05-27 19:21:47 +00002436 if (next == NULL) {
2437 /* No matches; return the original string */
2438 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002440
Thomas Wouters477c8d52006-05-27 19:21:47 +00002441 /* Need to make a new string */
2442 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2443 if (result == NULL)
2444 return NULL;
2445 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002446 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002447
Thomas Wouters477c8d52006-05-27 19:21:47 +00002448 /* change everything in-place, starting with this one */
2449 start = result_s + (next-self_s);
2450 *start = to_c;
2451 start++;
2452 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002453
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 while (--maxcount > 0) {
2455 next = findchar(start, end-start, from_c);
2456 if (next == NULL)
2457 break;
2458 *next = to_c;
2459 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002460 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002461
Thomas Wouters477c8d52006-05-27 19:21:47 +00002462 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463}
2464
Thomas Wouters477c8d52006-05-27 19:21:47 +00002465/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2466Py_LOCAL(PyStringObject *)
2467replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002468 const char *from_s, Py_ssize_t from_len,
2469 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002470 Py_ssize_t maxcount)
2471{
2472 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002473 char *self_s;
2474 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002475 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002476
Thomas Wouters477c8d52006-05-27 19:21:47 +00002477 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002478
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479 self_s = PyString_AS_STRING(self);
2480 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002481
Thomas Wouters477c8d52006-05-27 19:21:47 +00002482 offset = findstring(self_s, self_len,
2483 from_s, from_len,
2484 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002485 if (offset == -1) {
2486 /* No matches; return the original string */
2487 return return_self(self);
2488 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002489
Thomas Wouters477c8d52006-05-27 19:21:47 +00002490 /* Need to make a new string */
2491 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2492 if (result == NULL)
2493 return NULL;
2494 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002495 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496
Thomas Wouters477c8d52006-05-27 19:21:47 +00002497 /* change everything in-place, starting with this one */
2498 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002499 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002500 start += from_len;
2501 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502
Thomas Wouters477c8d52006-05-27 19:21:47 +00002503 while ( --maxcount > 0) {
2504 offset = findstring(start, end-start,
2505 from_s, from_len,
2506 0, end-start, FORWARD);
2507 if (offset==-1)
2508 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002509 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002510 start += offset+from_len;
2511 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002512
Thomas Wouters477c8d52006-05-27 19:21:47 +00002513 return result;
2514}
2515
2516/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2517Py_LOCAL(PyStringObject *)
2518replace_single_character(PyStringObject *self,
2519 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002520 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002521 Py_ssize_t maxcount)
2522{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002523 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002524 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002525 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002526 Py_ssize_t count, product;
2527 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002528
Thomas Wouters477c8d52006-05-27 19:21:47 +00002529 self_s = PyString_AS_STRING(self);
2530 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002531
Thomas Wouters477c8d52006-05-27 19:21:47 +00002532 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002533 if (count == 0) {
2534 /* no matches, return unchanged */
2535 return return_self(self);
2536 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002537
Thomas Wouters477c8d52006-05-27 19:21:47 +00002538 /* use the difference between current and new, hence the "-1" */
2539 /* result_len = self_len + count * (to_len-1) */
2540 product = count * (to_len-1);
2541 if (product / (to_len-1) != count) {
2542 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2543 return NULL;
2544 }
2545 result_len = self_len + product;
2546 if (result_len < 0) {
2547 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2548 return NULL;
2549 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002550
Thomas Wouters477c8d52006-05-27 19:21:47 +00002551 if ( (result = (PyStringObject *)
2552 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2553 return NULL;
2554 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002555
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 start = self_s;
2557 end = self_s + self_len;
2558 while (count-- > 0) {
2559 next = findchar(start, end-start, from_c);
2560 if (next == NULL)
2561 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002562
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 if (next == start) {
2564 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002565 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002566 result_s += to_len;
2567 start += 1;
2568 } else {
2569 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002570 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002571 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002572 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002573 result_s += to_len;
2574 start = next+1;
2575 }
2576 }
2577 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002578 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002579
Thomas Wouters477c8d52006-05-27 19:21:47 +00002580 return result;
2581}
2582
2583/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2584Py_LOCAL(PyStringObject *)
2585replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002586 const char *from_s, Py_ssize_t from_len,
2587 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002588 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002589 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002590 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002591 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002592 Py_ssize_t count, offset, product;
2593 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002594
Thomas Wouters477c8d52006-05-27 19:21:47 +00002595 self_s = PyString_AS_STRING(self);
2596 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002597
Thomas Wouters477c8d52006-05-27 19:21:47 +00002598 count = countstring(self_s, self_len,
2599 from_s, from_len,
2600 0, self_len, FORWARD, maxcount);
2601 if (count == 0) {
2602 /* no matches, return unchanged */
2603 return return_self(self);
2604 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002605
Thomas Wouters477c8d52006-05-27 19:21:47 +00002606 /* Check for overflow */
2607 /* result_len = self_len + count * (to_len-from_len) */
2608 product = count * (to_len-from_len);
2609 if (product / (to_len-from_len) != count) {
2610 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2611 return NULL;
2612 }
2613 result_len = self_len + product;
2614 if (result_len < 0) {
2615 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2616 return NULL;
2617 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002618
Thomas Wouters477c8d52006-05-27 19:21:47 +00002619 if ( (result = (PyStringObject *)
2620 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2621 return NULL;
2622 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002623
Thomas Wouters477c8d52006-05-27 19:21:47 +00002624 start = self_s;
2625 end = self_s + self_len;
2626 while (count-- > 0) {
2627 offset = findstring(start, end-start,
2628 from_s, from_len,
2629 0, end-start, FORWARD);
2630 if (offset == -1)
2631 break;
2632 next = start+offset;
2633 if (next == start) {
2634 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002635 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002636 result_s += to_len;
2637 start += from_len;
2638 } else {
2639 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002640 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002642 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002643 result_s += to_len;
2644 start = next+from_len;
2645 }
2646 }
2647 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002648 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002649
Thomas Wouters477c8d52006-05-27 19:21:47 +00002650 return result;
2651}
2652
2653
2654Py_LOCAL(PyStringObject *)
2655replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002656 const char *from_s, Py_ssize_t from_len,
2657 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002658 Py_ssize_t maxcount)
2659{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002660 if (maxcount < 0) {
2661 maxcount = PY_SSIZE_T_MAX;
2662 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2663 /* nothing to do; return the original string */
2664 return return_self(self);
2665 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002666
Thomas Wouters477c8d52006-05-27 19:21:47 +00002667 if (maxcount == 0 ||
2668 (from_len == 0 && to_len == 0)) {
2669 /* nothing to do; return the original string */
2670 return return_self(self);
2671 }
2672
2673 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002674
Thomas Wouters477c8d52006-05-27 19:21:47 +00002675 if (from_len == 0) {
2676 /* insert the 'to' string everywhere. */
2677 /* >>> "Python".replace("", ".") */
2678 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002679 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002680 }
2681
2682 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2683 /* point for an empty self string to generate a non-empty string */
2684 /* Special case so the remaining code always gets a non-empty string */
2685 if (PyString_GET_SIZE(self) == 0) {
2686 return return_self(self);
2687 }
2688
2689 if (to_len == 0) {
2690 /* delete all occurances of 'from' string */
2691 if (from_len == 1) {
2692 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002693 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002694 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002695 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002696 }
2697 }
2698
2699 /* Handle special case where both strings have the same length */
2700
2701 if (from_len == to_len) {
2702 if (from_len == 1) {
2703 return replace_single_character_in_place(
2704 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002705 from_s[0],
2706 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002707 maxcount);
2708 } else {
2709 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002710 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002711 }
2712 }
2713
2714 /* Otherwise use the more generic algorithms */
2715 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002716 return replace_single_character(self, from_s[0],
2717 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002718 } else {
2719 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002720 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002721 }
2722}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002723
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002724PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002725"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002726\n\
2727Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002728old replaced by new. If the optional argument count is\n\
2729given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002730
2731static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002732string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002734 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002735 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736 const char *from_s, *to_s;
2737 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002738
Thomas Wouters477c8d52006-05-27 19:21:47 +00002739 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002740 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002741
Thomas Wouters477c8d52006-05-27 19:21:47 +00002742 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002743 from_s = PyString_AS_STRING(from);
2744 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002745 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002746 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002747 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002748 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002749 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002750 return NULL;
2751
Thomas Wouters477c8d52006-05-27 19:21:47 +00002752 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002753 to_s = PyString_AS_STRING(to);
2754 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002755 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002756 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002757 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002758 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002759 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002760 return NULL;
2761
Thomas Wouters477c8d52006-05-27 19:21:47 +00002762 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002763 from_s, from_len,
2764 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002765}
2766
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002768
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002769/* Matches the end (direction >= 0) or start (direction < 0) of self
2770 * against substr, using the start and end arguments. Returns
2771 * -1 on error, 0 if not found and 1 if found.
2772 */
2773Py_LOCAL(int)
2774_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2775 Py_ssize_t end, int direction)
2776{
2777 Py_ssize_t len = PyString_GET_SIZE(self);
2778 Py_ssize_t slen;
2779 const char* sub;
2780 const char* str;
2781
2782 if (PyString_Check(substr)) {
2783 sub = PyString_AS_STRING(substr);
2784 slen = PyString_GET_SIZE(substr);
2785 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002786 else if (PyUnicode_Check(substr))
2787 return PyUnicode_Tailmatch((PyObject *)self,
2788 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002789 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2790 return -1;
2791 str = PyString_AS_STRING(self);
2792
2793 string_adjust_indices(&start, &end, len);
2794
2795 if (direction < 0) {
2796 /* startswith */
2797 if (start+slen > len)
2798 return 0;
2799 } else {
2800 /* endswith */
2801 if (end-start < slen || start > len)
2802 return 0;
2803
2804 if (end-slen > start)
2805 start = end - slen;
2806 }
2807 if (end-start >= slen)
2808 return ! memcmp(str+start, sub, slen);
2809 return 0;
2810}
2811
2812
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002813PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002814"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002815\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002816Return True if S starts with the specified prefix, False otherwise.\n\
2817With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002818With optional end, stop comparing S at that position.\n\
2819prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002820
2821static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002822string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002823{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002824 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002825 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002826 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002827 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002828
Guido van Rossumc6821402000-05-08 14:08:05 +00002829 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2830 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002831 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002832 if (PyTuple_Check(subobj)) {
2833 Py_ssize_t i;
2834 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2835 result = _string_tailmatch(self,
2836 PyTuple_GET_ITEM(subobj, i),
2837 start, end, -1);
2838 if (result == -1)
2839 return NULL;
2840 else if (result) {
2841 Py_RETURN_TRUE;
2842 }
2843 }
2844 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002846 result = _string_tailmatch(self, subobj, start, end, -1);
2847 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002848 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002849 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002850 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002851}
2852
2853
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002854PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002855"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002856\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002857Return True if S ends with the specified suffix, False otherwise.\n\
2858With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002859With optional end, stop comparing S at that position.\n\
2860suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002861
2862static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002863string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002864{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002865 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002866 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002868 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002869
Guido van Rossumc6821402000-05-08 14:08:05 +00002870 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2871 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002873 if (PyTuple_Check(subobj)) {
2874 Py_ssize_t i;
2875 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2876 result = _string_tailmatch(self,
2877 PyTuple_GET_ITEM(subobj, i),
2878 start, end, +1);
2879 if (result == -1)
2880 return NULL;
2881 else if (result) {
2882 Py_RETURN_TRUE;
2883 }
2884 }
2885 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002887 result = _string_tailmatch(self, subobj, start, end, +1);
2888 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002889 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002890 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002891 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002892}
2893
2894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002895PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002896"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002897\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002898Encodes S using the codec registered for encoding. encoding defaults\n\
2899to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002900handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002901a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2902'xmlcharrefreplace' as well as any other name registered with\n\
2903codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002904
2905static PyObject *
2906string_encode(PyStringObject *self, PyObject *args)
2907{
2908 char *encoding = NULL;
2909 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002910 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002911
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002912 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2913 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002914 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002915 if (v == NULL)
2916 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00002917 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002918 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00002919 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002920 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002921 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002922 Py_DECREF(v);
2923 return NULL;
2924 }
2925 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002926
2927 onError:
2928 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002929}
2930
2931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002932PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002933"S.decode([encoding[,errors]]) -> object\n\
2934\n\
2935Decodes S using the codec registered for encoding. encoding defaults\n\
2936to the default encoding. errors may be given to set a different error\n\
2937handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002938a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2939as well as any other name registerd with codecs.register_error that is\n\
2940able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002941
2942static PyObject *
2943string_decode(PyStringObject *self, PyObject *args)
2944{
2945 char *encoding = NULL;
2946 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002947 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002948
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002949 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2950 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002951 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002952 if (v == NULL)
2953 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002954 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2955 PyErr_Format(PyExc_TypeError,
2956 "decoder did not return a string/unicode object "
2957 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002958 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002959 Py_DECREF(v);
2960 return NULL;
2961 }
2962 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002963
2964 onError:
2965 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002966}
2967
2968
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002969static PyObject *
2970string_getnewargs(PyStringObject *v)
2971{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002972 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002973}
2974
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002975
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002976static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002977string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002978 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2979 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00002980 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002981 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2982 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2983 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2984 _Py_islower__doc__},
2985 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2986 _Py_isupper__doc__},
2987 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2988 _Py_isspace__doc__},
2989 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2990 _Py_isdigit__doc__},
2991 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2992 _Py_istitle__doc__},
2993 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2994 _Py_isalpha__doc__},
2995 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2996 _Py_isalnum__doc__},
2997 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2998 _Py_capitalize__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002999 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3000 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3001 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003002 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003003 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3004 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3005 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3006 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3007 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3008 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3009 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003010 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3011 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003012 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3013 startswith__doc__},
3014 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003015 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3016 _Py_swapcase__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003017 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3018 translate__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003019 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3020 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3021 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3022 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3023 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003024 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3025 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003026 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003027 expandtabs__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003028 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003029 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003030 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003031 {NULL, NULL} /* sentinel */
3032};
3033
Jeremy Hylton938ace62002-07-17 16:30:39 +00003034static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003035str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3036
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003037static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003038string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003040 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003041 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003042
Guido van Rossumae960af2001-08-30 03:11:59 +00003043 if (type != &PyString_Type)
3044 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003045 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003046 return NULL;
3047 if (x == NULL)
3048 return PyString_FromString("");
3049 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003050}
3051
Guido van Rossumae960af2001-08-30 03:11:59 +00003052static PyObject *
3053str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3054{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003055 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003056 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003057
3058 assert(PyType_IsSubtype(type, &PyString_Type));
3059 tmp = string_new(&PyString_Type, args, kwds);
3060 if (tmp == NULL)
3061 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003062 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003063 n = PyString_GET_SIZE(tmp);
3064 pnew = type->tp_alloc(type, n);
3065 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003066 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003067 ((PyStringObject *)pnew)->ob_shash =
3068 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003069 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003070 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003071 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003072 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003073}
3074
Guido van Rossumcacfc072002-05-24 19:01:59 +00003075static PyObject *
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003076string_mod(PyObject *v, PyObject *w)
3077{
3078 if (!PyString_Check(v)) {
3079 Py_INCREF(Py_NotImplemented);
3080 return Py_NotImplemented;
3081 }
3082 return PyString_Format(v, w);
3083}
3084
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003085static PyNumberMethods string_as_number = {
3086 0, /*nb_add*/
3087 0, /*nb_subtract*/
3088 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003089 string_mod, /*nb_remainder*/
3090};
3091
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003092PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003093"str(object) -> string\n\
3094\n\
3095Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003096If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003097
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003098static PyObject *str_iter(PyObject *seq);
3099
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003100PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003101 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003102 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003103 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003104 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003105 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003106 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003107 0, /* tp_getattr */
3108 0, /* tp_setattr */
3109 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003110 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003111 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003112 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003113 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003114 (hashfunc)string_hash, /* tp_hash */
3115 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003116 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003117 PyObject_GenericGetAttr, /* tp_getattro */
3118 0, /* tp_setattro */
3119 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003120 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3121 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003122 string_doc, /* tp_doc */
3123 0, /* tp_traverse */
3124 0, /* tp_clear */
3125 (richcmpfunc)string_richcompare, /* tp_richcompare */
3126 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003127 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003128 0, /* tp_iternext */
3129 string_methods, /* tp_methods */
3130 0, /* tp_members */
3131 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003132 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003133 0, /* tp_dict */
3134 0, /* tp_descr_get */
3135 0, /* tp_descr_set */
3136 0, /* tp_dictoffset */
3137 0, /* tp_init */
3138 0, /* tp_alloc */
3139 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003140 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003141};
3142
3143void
Fred Drakeba096332000-07-09 07:04:36 +00003144PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003145{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003146 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003147 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003148 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003149 if (w == NULL || !PyString_Check(*pv)) {
3150 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003151 *pv = NULL;
3152 return;
3153 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003154 v = string_concat((PyStringObject *) *pv, w);
3155 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003156 *pv = v;
3157}
3158
Guido van Rossum013142a1994-08-30 08:19:36 +00003159void
Fred Drakeba096332000-07-09 07:04:36 +00003160PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003161{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003162 PyString_Concat(pv, w);
3163 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003164}
3165
3166
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003167/* The following function breaks the notion that strings are immutable:
3168 it changes the size of a string. We get away with this only if there
3169 is only one module referencing the object. You can also think of it
3170 as creating a new string object and destroying the old one, only
3171 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003172 already be known to some other part of the code...
3173 Note that if there's not enough memory to resize the string, the original
3174 string object at *pv is deallocated, *pv is set to NULL, an "out of
3175 memory" exception is set, and -1 is returned. Else (on success) 0 is
3176 returned, and the value in *pv may or may not be the same as on input.
3177 As always, an extra byte is allocated for a trailing \0 byte (newsize
3178 does *not* include that), and a trailing \0 byte is stored.
3179*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003180
3181int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003182_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003183{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 register PyObject *v;
3185 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003186 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003187 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00003188 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003189 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003190 Py_DECREF(v);
3191 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003192 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003193 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003194 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003195 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003196 _Py_ForgetReference(v);
3197 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003198 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003199 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003200 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003201 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003202 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003203 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204 _Py_NewReference(*pv);
3205 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003206 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003207 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003208 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003209 return 0;
3210}
Guido van Rossume5372401993-03-16 12:15:04 +00003211
3212/* Helpers for formatstring */
3213
Thomas Wouters477c8d52006-05-27 19:21:47 +00003214Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00003215getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003216{
Thomas Wouters977485d2006-02-16 15:59:12 +00003217 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003218 if (argidx < arglen) {
3219 (*p_argidx)++;
3220 if (arglen < 0)
3221 return args;
3222 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003223 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003224 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003225 PyErr_SetString(PyExc_TypeError,
3226 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003227 return NULL;
3228}
3229
Tim Peters38fd5b62000-09-21 05:43:11 +00003230/* Format codes
3231 * F_LJUST '-'
3232 * F_SIGN '+'
3233 * F_BLANK ' '
3234 * F_ALT '#'
3235 * F_ZERO '0'
3236 */
Guido van Rossume5372401993-03-16 12:15:04 +00003237#define F_LJUST (1<<0)
3238#define F_SIGN (1<<1)
3239#define F_BLANK (1<<2)
3240#define F_ALT (1<<3)
3241#define F_ZERO (1<<4)
3242
Thomas Wouters477c8d52006-05-27 19:21:47 +00003243Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003244formatfloat(char *buf, size_t buflen, int flags,
3245 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003246{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003247 /* fmt = '%#.' + `prec` + `type`
3248 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003249 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003250 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003251 x = PyFloat_AsDouble(v);
3252 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00003253 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003254 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003255 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003256 }
Guido van Rossume5372401993-03-16 12:15:04 +00003257 if (prec < 0)
3258 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003259 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3260 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003261 /* Worst case length calc to ensure no buffer overrun:
3262
3263 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003264 fmt = %#.<prec>g
3265 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003266 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003267 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003268
3269 'f' formats:
3270 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3271 len = 1 + 50 + 1 + prec = 52 + prec
3272
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003273 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003274 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003275
3276 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00003277 if (((type == 'g' || type == 'G') &&
3278 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003279 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003280 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003281 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003282 return -1;
3283 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003284 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3285 (flags&F_ALT) ? "#" : "",
3286 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003287 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003288 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003289}
3290
Tim Peters38fd5b62000-09-21 05:43:11 +00003291/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3292 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3293 * Python's regular ints.
3294 * Return value: a new PyString*, or NULL if error.
3295 * . *pbuf is set to point into it,
3296 * *plen set to the # of chars following that.
3297 * Caller must decref it when done using pbuf.
3298 * The string starting at *pbuf is of the form
3299 * "-"? ("0x" | "0X")? digit+
3300 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003301 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003302 * There will be at least prec digits, zero-filled on the left if
3303 * necessary to get that many.
3304 * val object to be converted
3305 * flags bitmask of format flags; only F_ALT is looked at
3306 * prec minimum number of digits; 0-fill on left if needed
3307 * type a character in [duoxX]; u acts the same as d
3308 *
3309 * CAUTION: o, x and X conversions on regular ints can never
3310 * produce a '-' sign, but can for Python's unbounded ints.
3311 */
3312PyObject*
3313_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3314 char **pbuf, int *plen)
3315{
3316 PyObject *result = NULL;
3317 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003318 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003319 int sign; /* 1 if '-', else 0 */
3320 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003321 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003322 int numdigits; /* len == numnondigits + numdigits */
3323 int numnondigits = 0;
3324
Guido van Rossumddefaf32007-01-14 03:31:43 +00003325 /* Avoid exceeding SSIZE_T_MAX */
3326 if (prec > PY_SSIZE_T_MAX-3) {
3327 PyErr_SetString(PyExc_OverflowError,
3328 "precision too large");
3329 return NULL;
3330 }
3331
Tim Peters38fd5b62000-09-21 05:43:11 +00003332 switch (type) {
3333 case 'd':
3334 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003335 /* Special-case boolean: we want 0/1 */
3336 if (PyBool_Check(val))
3337 result = PyNumber_ToBase(val, 10);
3338 else
3339 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003340 break;
3341 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003342 numnondigits = 2;
3343 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003344 break;
3345 case 'x':
3346 case 'X':
3347 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003348 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003349 break;
3350 default:
3351 assert(!"'type' not in [duoxX]");
3352 }
3353 if (!result)
3354 return NULL;
3355
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003356 buf = PyString_AsString(result);
3357 if (!buf) {
3358 Py_DECREF(result);
3359 return NULL;
3360 }
3361
Tim Peters38fd5b62000-09-21 05:43:11 +00003362 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003363 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003364 PyErr_BadInternalCall();
3365 return NULL;
3366 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00003367 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003368 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003369 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3370 return NULL;
3371 }
3372 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003373 if (buf[len-1] == 'L') {
3374 --len;
3375 buf[len] = '\0';
3376 }
3377 sign = buf[0] == '-';
3378 numnondigits += sign;
3379 numdigits = len - numnondigits;
3380 assert(numdigits > 0);
3381
Tim Petersfff53252001-04-12 18:38:48 +00003382 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003383 if (((flags & F_ALT) == 0 &&
3384 (type == 'o' || type == 'x' || type == 'X'))) {
3385 assert(buf[sign] == '0');
3386 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3387 buf[sign+1] == 'o');
3388 numnondigits -= 2;
3389 buf += 2;
3390 len -= 2;
3391 if (sign)
3392 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003393 assert(len == numnondigits + numdigits);
3394 assert(numdigits > 0);
3395 }
3396
3397 /* Fill with leading zeroes to meet minimum width. */
3398 if (prec > numdigits) {
3399 PyObject *r1 = PyString_FromStringAndSize(NULL,
3400 numnondigits + prec);
3401 char *b1;
3402 if (!r1) {
3403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 b1 = PyString_AS_STRING(r1);
3407 for (i = 0; i < numnondigits; ++i)
3408 *b1++ = *buf++;
3409 for (i = 0; i < prec - numdigits; i++)
3410 *b1++ = '0';
3411 for (i = 0; i < numdigits; i++)
3412 *b1++ = *buf++;
3413 *b1 = '\0';
3414 Py_DECREF(result);
3415 result = r1;
3416 buf = PyString_AS_STRING(result);
3417 len = numnondigits + prec;
3418 }
3419
3420 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003421 if (type == 'X') {
3422 /* Need to convert all lower case letters to upper case.
3423 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003424 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003425 if (buf[i] >= 'a' && buf[i] <= 'x')
3426 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003427 }
3428 *pbuf = buf;
3429 *plen = len;
3430 return result;
3431}
3432
Thomas Wouters477c8d52006-05-27 19:21:47 +00003433Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003434formatint(char *buf, size_t buflen, int flags,
3435 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003436{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003437 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003438 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3439 + 1 + 1 = 24 */
3440 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003441 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003442 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003443
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003444 x = PyInt_AsLong(v);
3445 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00003446 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003447 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003448 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003449 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003450 if (x < 0 && type == 'u') {
3451 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003452 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003453 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3454 sign = "-";
3455 else
3456 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003457 if (prec < 0)
3458 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003459
3460 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003461 (type == 'x' || type == 'X' || type == 'o')) {
3462 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003463 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003464 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003465 * - when 0 is being converted, the C standard leaves off
3466 * the '0x' or '0X', which is inconsistent with other
3467 * %#x/%#X conversions and inconsistent with Python's
3468 * hex() function
3469 * - there are platforms that violate the standard and
3470 * convert 0 with the '0x' or '0X'
3471 * (Metrowerks, Compaq Tru64)
3472 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003473 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003474 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003475 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003476 * We can achieve the desired consistency by inserting our
3477 * own '0x' or '0X' prefix, and substituting %x/%X in place
3478 * of %#x/%#X.
3479 *
3480 * Note that this is the same approach as used in
3481 * formatint() in unicodeobject.c
3482 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003483 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3484 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003485 }
3486 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003487 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3488 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003489 prec, type);
3490 }
3491
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003492 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003493 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003494 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003495 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003496 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003497 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003498 return -1;
3499 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003500 if (sign[0])
3501 PyOS_snprintf(buf, buflen, fmt, -x);
3502 else
3503 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003504 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003505}
3506
Thomas Wouters477c8d52006-05-27 19:21:47 +00003507Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003508formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003509{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003510 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 if (PyString_Check(v)) {
3512 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003513 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003514 }
3515 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003516 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003517 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003518 }
3519 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003520 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003521}
3522
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003523/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3524
3525 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3526 chars are formatted. XXX This is a magic number. Each formatting
3527 routine does bounds checking to ensure no overflow, but a better
3528 solution may be to malloc a buffer of appropriate size for each
3529 format. For now, the current solution is sufficient.
3530*/
3531#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003532
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003533PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003534PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003535{
3536 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003537 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003538 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003539 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003540 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003541 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 PyObject *dict = NULL;
3543 if (format == NULL || !PyString_Check(format) || args == NULL) {
3544 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003545 return NULL;
3546 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003547 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003548 fmt = PyString_AS_STRING(format);
3549 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003550 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003551 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003552 if (result == NULL)
3553 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003554 res = PyString_AsString(result);
3555 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003556 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003557 argidx = 0;
3558 }
3559 else {
3560 arglen = -1;
3561 argidx = -2;
3562 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003563 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003564 !PyString_Check(args) && !PyUnicode_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003565 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003566 while (--fmtcnt >= 0) {
3567 if (*fmt != '%') {
3568 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003569 rescnt = fmtcnt + 100;
3570 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003571 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003572 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003573 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003574 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003575 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003576 }
3577 *res++ = *fmt++;
3578 }
3579 else {
3580 /* Got a format specifier */
3581 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003582 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003583 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003584 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003585 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003586 PyObject *v = NULL;
3587 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003588 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003589 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003590 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003591 char formatbuf[FORMATBUFLEN];
3592 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00003593 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003594 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003595
Guido van Rossumda9c2711996-12-05 21:58:58 +00003596 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003597 if (*fmt == '(') {
3598 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003599 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003600 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003601 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003602
3603 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003604 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003605 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003606 goto error;
3607 }
3608 ++fmt;
3609 --fmtcnt;
3610 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003611 /* Skip over balanced parentheses */
3612 while (pcount > 0 && --fmtcnt >= 0) {
3613 if (*fmt == ')')
3614 --pcount;
3615 else if (*fmt == '(')
3616 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003617 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003618 }
3619 keylen = fmt - keystart - 1;
3620 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003621 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003622 "incomplete format key");
3623 goto error;
3624 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003625 key = PyString_FromStringAndSize(keystart,
3626 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003627 if (key == NULL)
3628 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003629 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003630 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003631 args_owned = 0;
3632 }
3633 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003634 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003635 if (args == NULL) {
3636 goto error;
3637 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003638 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003639 arglen = -1;
3640 argidx = -2;
3641 }
Guido van Rossume5372401993-03-16 12:15:04 +00003642 while (--fmtcnt >= 0) {
3643 switch (c = *fmt++) {
3644 case '-': flags |= F_LJUST; continue;
3645 case '+': flags |= F_SIGN; continue;
3646 case ' ': flags |= F_BLANK; continue;
3647 case '#': flags |= F_ALT; continue;
3648 case '0': flags |= F_ZERO; continue;
3649 }
3650 break;
3651 }
3652 if (c == '*') {
3653 v = getnextarg(args, arglen, &argidx);
3654 if (v == NULL)
3655 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003656 if (!PyInt_Check(v)) {
3657 PyErr_SetString(PyExc_TypeError,
3658 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003659 goto error;
3660 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003661 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00003662 if (width == -1 && PyErr_Occurred())
3663 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003664 if (width < 0) {
3665 flags |= F_LJUST;
3666 width = -width;
3667 }
Guido van Rossume5372401993-03-16 12:15:04 +00003668 if (--fmtcnt >= 0)
3669 c = *fmt++;
3670 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003671 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003672 width = c - '0';
3673 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003674 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003675 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00003676 break;
3677 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003678 PyErr_SetString(
3679 PyExc_ValueError,
3680 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003681 goto error;
3682 }
3683 width = width*10 + (c - '0');
3684 }
3685 }
3686 if (c == '.') {
3687 prec = 0;
3688 if (--fmtcnt >= 0)
3689 c = *fmt++;
3690 if (c == '*') {
3691 v = getnextarg(args, arglen, &argidx);
3692 if (v == NULL)
3693 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003694 if (!PyInt_Check(v)) {
3695 PyErr_SetString(
3696 PyExc_TypeError,
3697 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003698 goto error;
3699 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003700 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00003701 if (prec == -1 && PyErr_Occurred())
3702 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003703 if (prec < 0)
3704 prec = 0;
3705 if (--fmtcnt >= 0)
3706 c = *fmt++;
3707 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003708 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003709 prec = c - '0';
3710 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003711 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003712 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00003713 break;
3714 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003715 PyErr_SetString(
3716 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003717 "prec too big");
3718 goto error;
3719 }
3720 prec = prec*10 + (c - '0');
3721 }
3722 }
3723 } /* prec */
3724 if (fmtcnt >= 0) {
3725 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003726 if (--fmtcnt >= 0)
3727 c = *fmt++;
3728 }
3729 }
3730 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003731 PyErr_SetString(PyExc_ValueError,
3732 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003733 goto error;
3734 }
3735 if (c != '%') {
3736 v = getnextarg(args, arglen, &argidx);
3737 if (v == NULL)
3738 goto error;
3739 }
3740 sign = 0;
3741 fill = ' ';
3742 switch (c) {
3743 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003744 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003745 len = 1;
3746 break;
3747 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00003748 if (PyUnicode_Check(v)) {
3749 fmt = fmt_start;
3750 argidx = argidx_start;
3751 goto unicode;
3752 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00003753 temp = _PyObject_Str(v);
3754 if (temp != NULL && PyUnicode_Check(temp)) {
3755 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00003756 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003757 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003758 goto unicode;
3759 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003760 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00003761 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00003762 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00003763 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003764 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003765 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003766 if (!PyString_Check(temp)) {
3767 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003768 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003769 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003770 goto error;
3771 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003772 pbuf = PyString_AS_STRING(temp);
3773 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003774 if (prec >= 0 && len > prec)
3775 len = prec;
3776 break;
3777 case 'i':
3778 case 'd':
3779 case 'u':
3780 case 'o':
3781 case 'x':
3782 case 'X':
3783 if (c == 'i')
3784 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003785 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003786 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003787 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00003788 prec, c, &pbuf, &ilen);
3789 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003790 if (!temp)
3791 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00003792 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003793 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003794 else {
3795 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003796 len = formatint(pbuf,
3797 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003798 flags, prec, c, v);
3799 if (len < 0)
3800 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003801 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003802 }
3803 if (flags & F_ZERO)
3804 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003805 break;
3806 case 'e':
3807 case 'E':
3808 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003809 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00003810 case 'g':
3811 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003812 if (c == 'F')
3813 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003814 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003815 len = formatfloat(pbuf, sizeof(formatbuf),
3816 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003817 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003818 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003819 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003820 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003821 fill = '0';
3822 break;
3823 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00003824 if (PyUnicode_Check(v)) {
3825 fmt = fmt_start;
3826 argidx = argidx_start;
3827 goto unicode;
3828 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003829 pbuf = formatbuf;
3830 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003831 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003832 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003833 break;
3834 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003835 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003836 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00003837 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00003838 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00003839 (Py_ssize_t)(fmt - 1 -
3840 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003841 goto error;
3842 }
3843 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003844 if (*pbuf == '-' || *pbuf == '+') {
3845 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003846 len--;
3847 }
3848 else if (flags & F_SIGN)
3849 sign = '+';
3850 else if (flags & F_BLANK)
3851 sign = ' ';
3852 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003853 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003854 }
3855 if (width < len)
3856 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003857 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003858 reslen -= rescnt;
3859 rescnt = width + fmtcnt + 100;
3860 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003861 if (reslen < 0) {
3862 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00003863 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003864 return PyErr_NoMemory();
3865 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00003866 if (_PyString_Resize(&result, reslen) < 0) {
3867 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003868 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00003869 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003870 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003871 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003872 }
3873 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003874 if (fill != ' ')
3875 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003876 rescnt--;
3877 if (width > len)
3878 width--;
3879 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003880 if ((flags & F_ALT) &&
3881 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003882 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003883 assert(pbuf[1] == c);
3884 if (fill != ' ') {
3885 *res++ = *pbuf++;
3886 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003887 }
Tim Petersfff53252001-04-12 18:38:48 +00003888 rescnt -= 2;
3889 width -= 2;
3890 if (width < 0)
3891 width = 0;
3892 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003893 }
3894 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003895 do {
3896 --rescnt;
3897 *res++ = fill;
3898 } while (--width > len);
3899 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003900 if (fill == ' ') {
3901 if (sign)
3902 *res++ = sign;
3903 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003904 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00003905 assert(pbuf[0] == '0');
3906 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003907 *res++ = *pbuf++;
3908 *res++ = *pbuf++;
3909 }
3910 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003911 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003912 res += len;
3913 rescnt -= len;
3914 while (--width >= len) {
3915 --rescnt;
3916 *res++ = ' ';
3917 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003918 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003919 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003920 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00003921 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00003922 goto error;
3923 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003924 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003925 } /* '%' */
3926 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003927 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003929 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003930 goto error;
3931 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003932 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003933 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003934 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003935 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003936 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003937
3938 unicode:
3939 if (args_owned) {
3940 Py_DECREF(args);
3941 args_owned = 0;
3942 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003943 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003944 if (PyTuple_Check(orig_args) && argidx > 0) {
3945 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003946 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00003947 v = PyTuple_New(n);
3948 if (v == NULL)
3949 goto error;
3950 while (--n >= 0) {
3951 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3952 Py_INCREF(w);
3953 PyTuple_SET_ITEM(v, n, w);
3954 }
3955 args = v;
3956 } else {
3957 Py_INCREF(orig_args);
3958 args = orig_args;
3959 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003960 args_owned = 1;
3961 /* Take what we have of the result and let the Unicode formatting
3962 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003963 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003964 if (_PyString_Resize(&result, rescnt))
3965 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003966 fmtcnt = PyString_GET_SIZE(format) - \
3967 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003968 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3969 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003970 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003971 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003972 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003973 if (v == NULL)
3974 goto error;
3975 /* Paste what we have (result) to what the Unicode formatting
3976 function returned (v) and return the result (or error) */
3977 w = PyUnicode_Concat(result, v);
3978 Py_DECREF(result);
3979 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003980 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003981 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003982
Guido van Rossume5372401993-03-16 12:15:04 +00003983 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003985 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003987 }
Guido van Rossume5372401993-03-16 12:15:04 +00003988 return NULL;
3989}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003990
Guido van Rossum2a61e741997-01-18 07:55:05 +00003991void
Fred Drakeba096332000-07-09 07:04:36 +00003992PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003993{
3994 register PyStringObject *s = (PyStringObject *)(*p);
3995 PyObject *t;
3996 if (s == NULL || !PyString_Check(s))
3997 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00003998 /* If it's a string subclass, we don't really know what putting
3999 it in the interned dict might do. */
4000 if (!PyString_CheckExact(s))
4001 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004002 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004003 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004004 if (interned == NULL) {
4005 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004006 if (interned == NULL) {
4007 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004008 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004009 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004010 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004011 t = PyDict_GetItem(interned, (PyObject *)s);
4012 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004013 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004014 Py_DECREF(*p);
4015 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004016 return;
4017 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004018
Armin Rigo79f7ad22004-08-07 19:27:39 +00004019 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004020 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004021 return;
4022 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004023 /* The two references in interned are not counted by refcnt.
4024 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004025 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004026 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004027}
4028
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004029void
4030PyString_InternImmortal(PyObject **p)
4031{
4032 PyString_InternInPlace(p);
4033 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4034 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4035 Py_INCREF(*p);
4036 }
4037}
4038
Guido van Rossum2a61e741997-01-18 07:55:05 +00004039
4040PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004041PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004042{
4043 PyObject *s = PyString_FromString(cp);
4044 if (s == NULL)
4045 return NULL;
4046 PyString_InternInPlace(&s);
4047 return s;
4048}
4049
Guido van Rossum8cf04761997-08-02 02:57:45 +00004050void
Fred Drakeba096332000-07-09 07:04:36 +00004051PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004052{
4053 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004054 for (i = 0; i < UCHAR_MAX + 1; i++) {
4055 Py_XDECREF(characters[i]);
4056 characters[i] = NULL;
4057 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004058 Py_XDECREF(nullstring);
4059 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004060}
Barry Warsawa903ad982001-02-23 16:40:48 +00004061
Barry Warsawa903ad982001-02-23 16:40:48 +00004062void _Py_ReleaseInternedStrings(void)
4063{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004064 PyObject *keys;
4065 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004066 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004067 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004068
4069 if (interned == NULL || !PyDict_Check(interned))
4070 return;
4071 keys = PyDict_Keys(interned);
4072 if (keys == NULL || !PyList_Check(keys)) {
4073 PyErr_Clear();
4074 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004075 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004076
4077 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4078 detector, interned strings are not forcibly deallocated; rather, we
4079 give them their stolen references back, and then clear and DECREF
4080 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004081
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004082 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004083 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4084 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004085 for (i = 0; i < n; i++) {
4086 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4087 switch (s->ob_sstate) {
4088 case SSTATE_NOT_INTERNED:
4089 /* XXX Shouldn't happen */
4090 break;
4091 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004092 Py_Refcnt(s) += 1;
4093 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094 break;
4095 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004096 Py_Refcnt(s) += 2;
4097 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004098 break;
4099 default:
4100 Py_FatalError("Inconsistent interned string state.");
4101 }
4102 s->ob_sstate = SSTATE_NOT_INTERNED;
4103 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004104 fprintf(stderr, "total size of all interned strings: "
4105 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4106 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004107 Py_DECREF(keys);
4108 PyDict_Clear(interned);
4109 Py_DECREF(interned);
4110 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004111}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004112
4113
4114/*********************** Str Iterator ****************************/
4115
4116typedef struct {
4117 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004118 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004119 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4120} striterobject;
4121
4122static void
4123striter_dealloc(striterobject *it)
4124{
4125 _PyObject_GC_UNTRACK(it);
4126 Py_XDECREF(it->it_seq);
4127 PyObject_GC_Del(it);
4128}
4129
4130static int
4131striter_traverse(striterobject *it, visitproc visit, void *arg)
4132{
4133 Py_VISIT(it->it_seq);
4134 return 0;
4135}
4136
4137static PyObject *
4138striter_next(striterobject *it)
4139{
4140 PyStringObject *seq;
4141 PyObject *item;
4142
4143 assert(it != NULL);
4144 seq = it->it_seq;
4145 if (seq == NULL)
4146 return NULL;
4147 assert(PyString_Check(seq));
4148
4149 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004150 item = PyString_FromStringAndSize(
4151 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004152 if (item != NULL)
4153 ++it->it_index;
4154 return item;
4155 }
4156
4157 Py_DECREF(seq);
4158 it->it_seq = NULL;
4159 return NULL;
4160}
4161
4162static PyObject *
4163striter_len(striterobject *it)
4164{
4165 Py_ssize_t len = 0;
4166 if (it->it_seq)
4167 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4168 return PyInt_FromSsize_t(len);
4169}
4170
Guido van Rossum49d6b072006-08-17 21:11:47 +00004171PyDoc_STRVAR(length_hint_doc,
4172 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004173
4174static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004175 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4176 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004177 {NULL, NULL} /* sentinel */
4178};
4179
4180PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004181 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004182 "striterator", /* tp_name */
4183 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004184 0, /* tp_itemsize */
4185 /* methods */
4186 (destructor)striter_dealloc, /* tp_dealloc */
4187 0, /* tp_print */
4188 0, /* tp_getattr */
4189 0, /* tp_setattr */
4190 0, /* tp_compare */
4191 0, /* tp_repr */
4192 0, /* tp_as_number */
4193 0, /* tp_as_sequence */
4194 0, /* tp_as_mapping */
4195 0, /* tp_hash */
4196 0, /* tp_call */
4197 0, /* tp_str */
4198 PyObject_GenericGetAttr, /* tp_getattro */
4199 0, /* tp_setattro */
4200 0, /* tp_as_buffer */
4201 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4202 0, /* tp_doc */
4203 (traverseproc)striter_traverse, /* tp_traverse */
4204 0, /* tp_clear */
4205 0, /* tp_richcompare */
4206 0, /* tp_weaklistoffset */
4207 PyObject_SelfIter, /* tp_iter */
4208 (iternextfunc)striter_next, /* tp_iternext */
4209 striter_methods, /* tp_methods */
4210 0,
4211};
4212
4213static PyObject *
4214str_iter(PyObject *seq)
4215{
4216 striterobject *it;
4217
4218 if (!PyString_Check(seq)) {
4219 PyErr_BadInternalCall();
4220 return NULL;
4221 }
4222 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
4223 if (it == NULL)
4224 return NULL;
4225 it->it_index = 0;
4226 Py_INCREF(seq);
4227 it->it_seq = (PyStringObject *)seq;
4228 _PyObject_GC_TRACK(it);
4229 return (PyObject *)it;
4230}