blob: 4b2778a544f11eda51cb8af8eacc2644686aa03c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +00008
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000176 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000245 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 n = (n*10) + *f++ - '0';
252 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000253 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000609 if (ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000613 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000614 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000615 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000622 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000623 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000624 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000632 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 default:
648 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000649 s--;
650 goto non_esc; /* an arbitry number of unescaped
651 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000654 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000655 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 return v;
657 failed:
658 Py_DECREF(v);
659 return NULL;
660}
661
Thomas Wouters477c8d52006-05-27 19:21:47 +0000662/* -------------------------------------------------------------------- */
663/* object api */
664
Martin v. Löwis18e16552006-02-15 17:27:45 +0000665static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000666string_getsize(register PyObject *op)
667{
668 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000669 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670 if (PyString_AsStringAndSize(op, &s, &len))
671 return -1;
672 return len;
673}
674
675static /*const*/ char *
676string_getbuffer(register PyObject *op)
677{
678 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000679 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000680 if (PyString_AsStringAndSize(op, &s, &len))
681 return NULL;
682 return s;
683}
684
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000686PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000688 if (PyUnicode_Check(op)) {
689 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
690 if (!op)
691 return -1;
692 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693 if (!PyString_Check(op))
694 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000695 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696}
697
698/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000699PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000701 if (PyUnicode_Check(op)) {
702 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
703 if (!op)
704 return NULL;
705 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706 if (!PyString_Check(op))
707 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709}
710
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000711int
712PyString_AsStringAndSize(register PyObject *obj,
713 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000714 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715{
716 if (s == NULL) {
717 PyErr_BadInternalCall();
718 return -1;
719 }
720
721 if (!PyString_Check(obj)) {
722 if (PyUnicode_Check(obj)) {
723 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
724 if (obj == NULL)
725 return -1;
726 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000727 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000730 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000731 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000739 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Thomas Wouters477c8d52006-05-27 19:21:47 +0000747/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748/* Methods */
749
Thomas Wouters477c8d52006-05-27 19:21:47 +0000750#define STRINGLIB_CHAR char
751
752#define STRINGLIB_CMP memcmp
753#define STRINGLIB_LEN PyString_GET_SIZE
754#define STRINGLIB_NEW PyString_FromStringAndSize
755#define STRINGLIB_STR PyString_AS_STRING
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000756#define STRINGLIB_WANT_CONTAINS_OBJ 1
Thomas Wouters477c8d52006-05-27 19:21:47 +0000757
758#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000759#define STRINGLIB_CHECK_EXACT PyString_CheckExact
760#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000761
762#include "stringlib/fastsearch.h"
763
764#include "stringlib/count.h"
765#include "stringlib/find.h"
766#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000767#include "stringlib/ctype.h"
768#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000769
770
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000771PyObject *
772PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000774 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000775 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000776 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000777 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000778 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000779 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000780 PyErr_SetString(PyExc_OverflowError,
781 "string is too large to make repr");
782 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000783 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000785 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786 }
787 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000788 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000789 register Py_UNICODE c;
790 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 int quote;
792
Thomas Wouters7e474022000-07-16 12:04:32 +0000793 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000795 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000796 char *test, *start;
797 start = PyString_AS_STRING(op);
798 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000799 if (*test == '"') {
800 quote = '\''; /* switch back to single quote */
801 goto decided;
802 }
803 else if (*test == '\'')
804 quote = '"';
805 }
806 decided:
807 ;
808 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000809
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000810 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000811 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000812 /* There's at least enough room for a hex escape
813 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000814 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000818 else if (c == '\t')
819 *p++ = '\\', *p++ = 't';
820 else if (c == '\n')
821 *p++ = '\\', *p++ = 'n';
822 else if (c == '\r')
823 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000824 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000825 *p++ = '\\';
826 *p++ = 'x';
827 *p++ = hexdigits[(c & 0xf0) >> 4];
828 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000829 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 else
831 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000833 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000834 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000836 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
837 Py_DECREF(v);
838 return NULL;
839 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000840 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842}
843
Guido van Rossum189f1df2001-05-01 16:51:53 +0000844static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000845string_repr(PyObject *op)
846{
847 return PyString_Repr(op, 1);
848}
849
850static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000851string_str(PyObject *s)
852{
Tim Petersc9933152001-10-16 20:18:24 +0000853 assert(PyString_Check(s));
854 if (PyString_CheckExact(s)) {
855 Py_INCREF(s);
856 return s;
857 }
858 else {
859 /* Subtype -- return genuine string with the same value. */
860 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000861 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000862 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863}
864
Martin v. Löwis18e16552006-02-15 17:27:45 +0000865static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000866string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000868 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869}
870
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000871static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000872string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000874 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000875 register PyStringObject *op;
876 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000877 if (PyUnicode_Check(bb))
878 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000879 if (PyBytes_Check(bb))
880 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000881 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000882 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000883 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884 return NULL;
885 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000888 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000889 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000890 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000891 Py_INCREF(bb);
892 return bb;
893 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 Py_INCREF(a);
895 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000897 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000898 if (size < 0) {
899 PyErr_SetString(PyExc_OverflowError,
900 "strings are too large to concat");
901 return NULL;
902 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000903
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000904 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000905 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000906 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000908 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000909 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000910 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000911 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
912 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915#undef b
916}
917
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000919string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921 register Py_ssize_t i;
922 register Py_ssize_t j;
923 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000925 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926 if (n < 0)
927 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000928 /* watch out for overflows: the size can overflow int,
929 * and the # of bytes needed can overflow size_t
930 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000931 size = Py_Size(a) * n;
932 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000933 PyErr_SetString(PyExc_OverflowError,
934 "repeated string is too long");
935 return NULL;
936 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000937 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 Py_INCREF(a);
939 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 }
Tim Peterse7c05322004-06-27 17:24:49 +0000941 nbytes = (size_t)size;
942 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000943 PyErr_SetString(PyExc_OverflowError,
944 "repeated string is too long");
945 return NULL;
946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000948 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000949 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000951 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000952 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000953 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000954 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000955 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000956 memset(op->ob_sval, a->ob_sval[0] , n);
957 return (PyObject *) op;
958 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000959 i = 0;
960 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000961 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
962 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000963 }
964 while (i < size) {
965 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000966 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i += j;
968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970}
971
Guido van Rossum9284a572000-03-07 15:53:43 +0000972static int
Thomas Wouters477c8d52006-05-27 19:21:47 +0000973string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +0000974{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000975 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000976 if (PyUnicode_Check(sub_obj))
977 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000978 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000979 PyErr_Format(PyExc_TypeError,
980 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000981 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000982 return -1;
983 }
Guido van Rossum9284a572000-03-07 15:53:43 +0000984 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000985
Thomas Wouters477c8d52006-05-27 19:21:47 +0000986 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +0000987}
988
Martin v. Löwiscd353062001-05-24 16:56:35 +0000989static PyObject*
990string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000992 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000993 Py_ssize_t len_a, len_b;
994 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000995 PyObject *result;
996
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000997 /* Make sure both arguments are strings. */
998 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000999 result = Py_NotImplemented;
1000 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001001 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001002 if (a == b) {
1003 switch (op) {
1004 case Py_EQ:case Py_LE:case Py_GE:
1005 result = Py_True;
1006 goto out;
1007 case Py_NE:case Py_LT:case Py_GT:
1008 result = Py_False;
1009 goto out;
1010 }
1011 }
1012 if (op == Py_EQ) {
1013 /* Supporting Py_NE here as well does not save
1014 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001015 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001016 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001017 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001018 result = Py_True;
1019 } else {
1020 result = Py_False;
1021 }
1022 goto out;
1023 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001024 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001025 min_len = (len_a < len_b) ? len_a : len_b;
1026 if (min_len > 0) {
1027 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1028 if (c==0)
1029 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001030 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001031 c = 0;
1032 if (c == 0)
1033 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1034 switch (op) {
1035 case Py_LT: c = c < 0; break;
1036 case Py_LE: c = c <= 0; break;
1037 case Py_EQ: assert(0); break; /* unreachable */
1038 case Py_NE: c = c != 0; break;
1039 case Py_GT: c = c > 0; break;
1040 case Py_GE: c = c >= 0; break;
1041 default:
1042 result = Py_NotImplemented;
1043 goto out;
1044 }
1045 result = c ? Py_True : Py_False;
1046 out:
1047 Py_INCREF(result);
1048 return result;
1049}
1050
1051int
1052_PyString_Eq(PyObject *o1, PyObject *o2)
1053{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 PyStringObject *a = (PyStringObject*) o1;
1055 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001058 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059}
1060
Guido van Rossum9bfef441993-03-29 10:43:31 +00001061static long
Fred Drakeba096332000-07-09 07:04:36 +00001062string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001063{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001064 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001065 register unsigned char *p;
1066 register long x;
1067
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001068 if (a->ob_shash != -1)
1069 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001070 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001071 p = (unsigned char *) a->ob_sval;
1072 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001073 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001074 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001075 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001076 if (x == -1)
1077 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001078 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001079 return x;
1080}
1081
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001082static PyObject*
1083string_subscript(PyStringObject* self, PyObject* item)
1084{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001085 if (PyIndex_Check(item)) {
1086 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001087 if (i == -1 && PyErr_Occurred())
1088 return NULL;
1089 if (i < 0)
1090 i += PyString_GET_SIZE(self);
Guido van Rossum75a902d2007-10-19 22:06:24 +00001091 if (i < 0 || i >= PyString_GET_SIZE(self)) {
1092 PyErr_SetString(PyExc_IndexError,
1093 "string index out of range");
1094 return NULL;
1095 }
1096 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001097 }
1098 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001099 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001100 char* source_buf;
1101 char* result_buf;
1102 PyObject* result;
1103
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001104 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001105 PyString_GET_SIZE(self),
1106 &start, &stop, &step, &slicelength) < 0) {
1107 return NULL;
1108 }
1109
1110 if (slicelength <= 0) {
1111 return PyString_FromStringAndSize("", 0);
1112 }
Thomas Woutersed03b412007-08-28 21:37:11 +00001113 else if (start == 0 && step == 1 &&
1114 slicelength == PyString_GET_SIZE(self) &&
1115 PyString_CheckExact(self)) {
1116 Py_INCREF(self);
1117 return (PyObject *)self;
1118 }
1119 else if (step == 1) {
1120 return PyString_FromStringAndSize(
1121 PyString_AS_STRING(self) + start,
1122 slicelength);
1123 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001124 else {
1125 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001126 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001127 if (result_buf == NULL)
1128 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001129
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001130 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001131 cur += step, i++) {
1132 result_buf[i] = source_buf[cur];
1133 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001134
1135 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001136 slicelength);
1137 PyMem_Free(result_buf);
1138 return result;
1139 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001142 PyErr_Format(PyExc_TypeError,
1143 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001144 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001145 return NULL;
1146 }
1147}
1148
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001149static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001150string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001151{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001152 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001153}
1154
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001155static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001156 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001157 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001158 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum75a902d2007-10-19 22:06:24 +00001159 0, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001160 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001161 0, /*sq_ass_item*/
1162 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001163 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164};
1165
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001167 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001168 (binaryfunc)string_subscript,
1169 0,
1170};
1171
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001172static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001173 (getbufferproc)string_buffer_getbuffer,
1174 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001175};
1176
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178#define LEFTSTRIP 0
1179#define RIGHTSTRIP 1
1180#define BOTHSTRIP 2
1181
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001182/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001183static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1184
1185#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001186
Thomas Wouters477c8d52006-05-27 19:21:47 +00001187
1188/* Don't call if length < 2 */
1189#define Py_STRING_MATCH(target, offset, pattern, length) \
1190 (target[offset] == pattern[0] && \
1191 target[offset+length-1] == pattern[length-1] && \
1192 !memcmp(target+offset+1, pattern+1, length-2) )
1193
1194
1195/* Overallocate the initial list to reduce the number of reallocs for small
1196 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1197 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1198 text (roughly 11 words per line) and field delimited data (usually 1-10
1199 fields). For large strings the split algorithms are bandwidth limited
1200 so increasing the preallocation likely will not improve things.*/
1201
1202#define MAX_PREALLOC 12
1203
1204/* 5 splits gives 6 elements */
1205#define PREALLOC_SIZE(maxsplit) \
1206 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1207
Thomas Wouters477c8d52006-05-27 19:21:47 +00001208#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001209 str = PyString_FromStringAndSize((data) + (left), \
1210 (right) - (left)); \
1211 if (str == NULL) \
1212 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213 if (count < MAX_PREALLOC) { \
1214 PyList_SET_ITEM(list, count, str); \
1215 } else { \
1216 if (PyList_Append(list, str)) { \
1217 Py_DECREF(str); \
1218 goto onError; \
1219 } \
1220 else \
1221 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001222 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001223 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224
Thomas Wouters477c8d52006-05-27 19:21:47 +00001225/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001226#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001227
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001228#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1229#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1230#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1231#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001232
1233Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001234split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001236 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001237 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001238 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239
1240 if (list == NULL)
1241 return NULL;
1242
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243 i = j = 0;
1244
1245 while (maxsplit-- > 0) {
1246 SKIP_SPACE(s, i, len);
1247 if (i==len) break;
1248 j = i; i++;
1249 SKIP_NONSPACE(s, i, len);
1250 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001252
1253 if (i < len) {
1254 /* Only occurs when maxsplit was reached */
1255 /* Skip any remaining whitespace and copy to end of string */
1256 SKIP_SPACE(s, i, len);
1257 if (i != len)
1258 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001260 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001262 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 Py_DECREF(list);
1264 return NULL;
1265}
1266
Thomas Wouters477c8d52006-05-27 19:21:47 +00001267Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001268split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001269{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001270 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001271 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001272 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001273
1274 if (list == NULL)
1275 return NULL;
1276
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 i = j = 0;
1278 while ((j < len) && (maxcount-- > 0)) {
1279 for(; j<len; j++) {
1280 /* I found that using memchr makes no difference */
1281 if (s[j] == ch) {
1282 SPLIT_ADD(s, i, j);
1283 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001284 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285 }
1286 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001287 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001288 if (i <= len) {
1289 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001290 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001292 return list;
1293
1294 onError:
1295 Py_DECREF(list);
1296 return NULL;
1297}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001299PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300"S.split([sep [,maxsplit]]) -> list of strings\n\
1301\n\
1302Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001304splits are done. If sep is not specified or is None, any\n\
1305whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306
1307static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001308string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001311 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313 PyObject *list, *str, *subobj = Py_None;
1314#ifdef USE_FAST
1315 Py_ssize_t pos;
1316#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001318 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001321 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001324 if (PyString_Check(subobj)) {
1325 sub = PyString_AS_STRING(subobj);
1326 n = PyString_GET_SIZE(subobj);
1327 }
1328 else if (PyUnicode_Check(subobj))
1329 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1330 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1331 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001332
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 if (n == 0) {
1334 PyErr_SetString(PyExc_ValueError, "empty separator");
1335 return NULL;
1336 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 else if (n == 1)
1338 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339
Thomas Wouters477c8d52006-05-27 19:21:47 +00001340 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341 if (list == NULL)
1342 return NULL;
1343
Thomas Wouters477c8d52006-05-27 19:21:47 +00001344#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001346 while (maxsplit-- > 0) {
1347 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1348 if (pos < 0)
1349 break;
1350 j = i+pos;
1351 SPLIT_ADD(s, i, j);
1352 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001354#else
1355 i = j = 0;
1356 while ((j+n <= len) && (maxsplit-- > 0)) {
1357 for (; j+n <= len; j++) {
1358 if (Py_STRING_MATCH(s, j, sub, n)) {
1359 SPLIT_ADD(s, i, j);
1360 i = j = j + n;
1361 break;
1362 }
1363 }
1364 }
1365#endif
1366 SPLIT_ADD(s, i, len);
1367 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368 return list;
1369
Thomas Wouters477c8d52006-05-27 19:21:47 +00001370 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 Py_DECREF(list);
1372 return NULL;
1373}
1374
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375PyDoc_STRVAR(partition__doc__,
1376"S.partition(sep) -> (head, sep, tail)\n\
1377\n\
1378Searches for the separator sep in S, and returns the part before it,\n\
1379the separator itself, and the part after it. If the separator is not\n\
1380found, returns S and two empty strings.");
1381
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001382static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001383string_partition(PyStringObject *self, PyObject *sep_obj)
1384{
1385 const char *sep;
1386 Py_ssize_t sep_len;
1387
1388 if (PyString_Check(sep_obj)) {
1389 sep = PyString_AS_STRING(sep_obj);
1390 sep_len = PyString_GET_SIZE(sep_obj);
1391 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001392 else if (PyUnicode_Check(sep_obj))
1393 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001394 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1395 return NULL;
1396
1397 return stringlib_partition(
1398 (PyObject*) self,
1399 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1400 sep_obj, sep, sep_len
1401 );
1402}
1403
1404PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001405"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001406\n\
1407Searches for the separator sep in S, starting at the end of S, and returns\n\
1408the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001409separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001410
1411static PyObject *
1412string_rpartition(PyStringObject *self, PyObject *sep_obj)
1413{
1414 const char *sep;
1415 Py_ssize_t sep_len;
1416
1417 if (PyString_Check(sep_obj)) {
1418 sep = PyString_AS_STRING(sep_obj);
1419 sep_len = PyString_GET_SIZE(sep_obj);
1420 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001421 else if (PyUnicode_Check(sep_obj))
1422 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001423 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1424 return NULL;
1425
1426 return stringlib_rpartition(
1427 (PyObject*) self,
1428 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1429 sep_obj, sep, sep_len
1430 );
1431}
1432
1433Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001434rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001435{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001436 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001437 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001438 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001439
1440 if (list == NULL)
1441 return NULL;
1442
Thomas Wouters477c8d52006-05-27 19:21:47 +00001443 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001444
Thomas Wouters477c8d52006-05-27 19:21:47 +00001445 while (maxsplit-- > 0) {
1446 RSKIP_SPACE(s, i);
1447 if (i<0) break;
1448 j = i; i--;
1449 RSKIP_NONSPACE(s, i);
1450 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001451 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001452 if (i >= 0) {
1453 /* Only occurs when maxsplit was reached */
1454 /* Skip any remaining whitespace and copy to beginning of string */
1455 RSKIP_SPACE(s, i);
1456 if (i >= 0)
1457 SPLIT_ADD(s, 0, i + 1);
1458
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001459 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001460 FIX_PREALLOC_SIZE(list);
1461 if (PyList_Reverse(list) < 0)
1462 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001463 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001464 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001465 Py_DECREF(list);
1466 return NULL;
1467}
1468
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001470rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001471{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001472 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001474 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001475
1476 if (list == NULL)
1477 return NULL;
1478
Thomas Wouters477c8d52006-05-27 19:21:47 +00001479 i = j = len - 1;
1480 while ((i >= 0) && (maxcount-- > 0)) {
1481 for (; i >= 0; i--) {
1482 if (s[i] == ch) {
1483 SPLIT_ADD(s, i + 1, j + 1);
1484 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001485 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486 }
1487 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 }
1489 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001490 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001492 FIX_PREALLOC_SIZE(list);
1493 if (PyList_Reverse(list) < 0)
1494 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495 return list;
1496
1497 onError:
1498 Py_DECREF(list);
1499 return NULL;
1500}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001501
1502PyDoc_STRVAR(rsplit__doc__,
1503"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1504\n\
1505Return a list of the words in the string S, using sep as the\n\
1506delimiter string, starting at the end of the string and working\n\
1507to the front. If maxsplit is given, at most maxsplit splits are\n\
1508done. If sep is not specified or is None, any whitespace string\n\
1509is a separator.");
1510
1511static PyObject *
1512string_rsplit(PyStringObject *self, PyObject *args)
1513{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001514 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001515 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001516 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001517 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001518
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001519 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001520 return NULL;
1521 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001522 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001523 if (subobj == Py_None)
1524 return rsplit_whitespace(s, len, maxsplit);
1525 if (PyString_Check(subobj)) {
1526 sub = PyString_AS_STRING(subobj);
1527 n = PyString_GET_SIZE(subobj);
1528 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001529 else if (PyUnicode_Check(subobj))
1530 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001531 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1532 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001533
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001534 if (n == 0) {
1535 PyErr_SetString(PyExc_ValueError, "empty separator");
1536 return NULL;
1537 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001538 else if (n == 1)
1539 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001540
Thomas Wouters477c8d52006-05-27 19:21:47 +00001541 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001542 if (list == NULL)
1543 return NULL;
1544
1545 j = len;
1546 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001547
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 while ( (i >= 0) && (maxsplit-- > 0) ) {
1549 for (; i>=0; i--) {
1550 if (Py_STRING_MATCH(s, i, sub, n)) {
1551 SPLIT_ADD(s, i + n, j);
1552 j = i;
1553 i -= n;
1554 break;
1555 }
1556 }
1557 }
1558 SPLIT_ADD(s, 0, j);
1559 FIX_PREALLOC_SIZE(list);
1560 if (PyList_Reverse(list) < 0)
1561 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001562 return list;
1563
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001565 Py_DECREF(list);
1566 return NULL;
1567}
1568
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001569#undef SPLIT_ADD
1570#undef MAX_PREALLOC
1571#undef PREALLOC_SIZE
1572
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001574PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575"S.join(sequence) -> string\n\
1576\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001577Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001581string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582{
1583 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001584 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001587 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001588 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001589 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001590 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591
Tim Peters19fe14e2001-01-19 03:03:47 +00001592 seq = PySequence_Fast(orig, "");
1593 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001594 return NULL;
1595 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001596
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001597 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001598 if (seqlen == 0) {
1599 Py_DECREF(seq);
1600 return PyString_FromString("");
1601 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001603 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001604 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1605 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001606 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001607 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001608 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001610
Raymond Hettinger674f2412004-08-23 23:23:54 +00001611 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001612 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001613 * Do a pre-pass to figure out the total amount of space we'll
1614 * need (sz), see whether any argument is absurd, and defer to
1615 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001616 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001617 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001618 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001619 item = PySequence_Fast_GET_ITEM(seq, i);
1620 if (!PyString_Check(item)){
1621 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001622 /* Defer to Unicode join.
1623 * CAUTION: There's no gurantee that the
1624 * original sequence can be iterated over
1625 * again, so we must pass seq here.
1626 */
1627 PyObject *result;
1628 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001629 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001630 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001631 }
1632 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001633 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001634 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001635 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 Py_DECREF(seq);
1637 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 sz += PyString_GET_SIZE(item);
1640 if (i != 0)
1641 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001642 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001643 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001644 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001645 Py_DECREF(seq);
1646 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001648 }
1649
1650 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001651 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001652 if (res == NULL) {
1653 Py_DECREF(seq);
1654 return NULL;
1655 }
1656
1657 /* Catenate everything. */
1658 p = PyString_AS_STRING(res);
1659 for (i = 0; i < seqlen; ++i) {
1660 size_t n;
1661 item = PySequence_Fast_GET_ITEM(seq, i);
1662 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001663 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001664 p += n;
1665 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001666 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001667 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001668 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001670
Jeremy Hylton49048292000-07-11 03:28:17 +00001671 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001673}
1674
Tim Peters52e155e2001-06-16 05:42:57 +00001675PyObject *
1676_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001677{
Tim Petersa7259592001-06-16 05:11:17 +00001678 assert(sep != NULL && PyString_Check(sep));
1679 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001680 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001681}
1682
Thomas Wouters477c8d52006-05-27 19:21:47 +00001683Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001684string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001685{
1686 if (*end > len)
1687 *end = len;
1688 else if (*end < 0)
1689 *end += len;
1690 if (*end < 0)
1691 *end = 0;
1692 if (*start < 0)
1693 *start += len;
1694 if (*start < 0)
1695 *start = 0;
1696}
1697
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001699string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001702 const char *sub;
1703 Py_ssize_t sub_len;
1704 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705
Thomas Wouters477c8d52006-05-27 19:21:47 +00001706 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1707 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001708 return -2;
1709 if (PyString_Check(subobj)) {
1710 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001711 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 }
1713 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714 return PyUnicode_Find(
1715 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001716 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001717 /* XXX - the "expected a character buffer object" is pretty
1718 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 return -2;
1720
Thomas Wouters477c8d52006-05-27 19:21:47 +00001721 if (dir > 0)
1722 return stringlib_find_slice(
1723 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1724 sub, sub_len, start, end);
1725 else
1726 return stringlib_rfind_slice(
1727 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1728 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729}
1730
1731
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733"S.find(sub [,start [,end]]) -> int\n\
1734\n\
1735Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001736such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737arguments start and end are interpreted as in slice notation.\n\
1738\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001739Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740
1741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001742string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 if (result == -2)
1746 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001747 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748}
1749
1750
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001751PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752"S.index(sub [,start [,end]]) -> int\n\
1753\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001754Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755
1756static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001757string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001759 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 if (result == -2)
1761 return NULL;
1762 if (result == -1) {
1763 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001764 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 return NULL;
1766 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001767 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768}
1769
1770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772"S.rfind(sub [,start [,end]]) -> int\n\
1773\n\
1774Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001775such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776arguments start and end are interpreted as in slice notation.\n\
1777\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001781string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001783 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 if (result == -2)
1785 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001786 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791"S.rindex(sub [,start [,end]]) -> int\n\
1792\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001793Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794
1795static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001796string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001798 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799 if (result == -2)
1800 return NULL;
1801 if (result == -1) {
1802 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001803 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 return NULL;
1805 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001806 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807}
1808
1809
Thomas Wouters477c8d52006-05-27 19:21:47 +00001810Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001811do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1812{
1813 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001814 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001815 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001816 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1817 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001818
1819 i = 0;
1820 if (striptype != RIGHTSTRIP) {
1821 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1822 i++;
1823 }
1824 }
1825
1826 j = len;
1827 if (striptype != LEFTSTRIP) {
1828 do {
1829 j--;
1830 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1831 j++;
1832 }
1833
1834 if (i == 0 && j == len && PyString_CheckExact(self)) {
1835 Py_INCREF(self);
1836 return (PyObject*)self;
1837 }
1838 else
1839 return PyString_FromStringAndSize(s+i, j-i);
1840}
1841
1842
Thomas Wouters477c8d52006-05-27 19:21:47 +00001843Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001844do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845{
1846 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001847 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 i = 0;
1850 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001851 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852 i++;
1853 }
1854 }
1855
1856 j = len;
1857 if (striptype != LEFTSTRIP) {
1858 do {
1859 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001860 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 j++;
1862 }
1863
Tim Peters8fa5dd02001-09-12 02:18:30 +00001864 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 Py_INCREF(self);
1866 return (PyObject*)self;
1867 }
1868 else
1869 return PyString_FromStringAndSize(s+i, j-i);
1870}
1871
1872
Thomas Wouters477c8d52006-05-27 19:21:47 +00001873Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001874do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1875{
1876 PyObject *sep = NULL;
1877
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001878 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001879 return NULL;
1880
1881 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001882 if (PyString_Check(sep))
1883 return do_xstrip(self, striptype, sep);
1884 else if (PyUnicode_Check(sep)) {
1885 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1886 PyObject *res;
1887 if (uniself==NULL)
1888 return NULL;
1889 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1890 striptype, sep);
1891 Py_DECREF(uniself);
1892 return res;
1893 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001894 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001895 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001896 STRIPNAME(striptype));
1897 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001898 }
1899
1900 return do_strip(self, striptype);
1901}
1902
1903
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001904PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001905"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906\n\
1907Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001908whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001909If chars is given and not None, remove characters in chars instead.\n\
1910If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911
1912static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001913string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001915 if (PyTuple_GET_SIZE(args) == 0)
1916 return do_strip(self, BOTHSTRIP); /* Common case */
1917 else
1918 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919}
1920
1921
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001922PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001923"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001925Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001930string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, LEFTSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001940"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
1946static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001947string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, RIGHTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953}
1954
1955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001956PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957"S.count(sub[, start[, end]]) -> int\n\
1958\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001959Return the number of non-overlapping occurrences of substring sub in\n\
1960string S[start:end]. Optional arguments start and end are interpreted\n\
1961as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
1963static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001964string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001966 PyObject *sub_obj;
1967 const char *str = PyString_AS_STRING(self), *sub;
1968 Py_ssize_t sub_len;
1969 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970
Thomas Wouters477c8d52006-05-27 19:21:47 +00001971 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1972 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001974
Thomas Wouters477c8d52006-05-27 19:21:47 +00001975 if (PyString_Check(sub_obj)) {
1976 sub = PyString_AS_STRING(sub_obj);
1977 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001978 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001979 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001980 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001981 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001982 if (count == -1)
1983 return NULL;
1984 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00001985 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001986 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001987 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988 return NULL;
1989
Thomas Wouters477c8d52006-05-27 19:21:47 +00001990 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001991
Thomas Wouters477c8d52006-05-27 19:21:47 +00001992 return PyInt_FromSsize_t(
1993 stringlib_count(str + start, end - start, sub, sub_len)
1994 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995}
1996
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001998PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999"S.translate(table [,deletechars]) -> string\n\
2000\n\
2001Return a copy of the string S, where all characters occurring\n\
2002in the optional argument deletechars are removed, and the\n\
2003remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002004translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005
2006static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002007string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002009 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002010 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002013 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 PyObject *result;
2016 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002017 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002019 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002020 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022
2023 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002024 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002025 tablen = PyString_GET_SIZE(tableobj);
2026 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002027 else if (tableobj == Py_None) {
2028 table = NULL;
2029 tablen = 256;
2030 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002031 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002032 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002033 parameter; instead a mapping to None will cause characters
2034 to be deleted. */
2035 if (delobj != NULL) {
2036 PyErr_SetString(PyExc_TypeError,
2037 "deletions are implemented differently for unicode");
2038 return NULL;
2039 }
2040 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2041 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002042 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044
Martin v. Löwis00b61272002-12-12 20:03:19 +00002045 if (tablen != 256) {
2046 PyErr_SetString(PyExc_ValueError,
2047 "translation table must be 256 characters long");
2048 return NULL;
2049 }
2050
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 if (delobj != NULL) {
2052 if (PyString_Check(delobj)) {
2053 del_table = PyString_AS_STRING(delobj);
2054 dellen = PyString_GET_SIZE(delobj);
2055 }
2056 else if (PyUnicode_Check(delobj)) {
2057 PyErr_SetString(PyExc_TypeError,
2058 "deletions are implemented differently for unicode");
2059 return NULL;
2060 }
2061 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2062 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 }
2064 else {
2065 del_table = NULL;
2066 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067 }
2068
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002069 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070 result = PyString_FromStringAndSize((char *)NULL, inlen);
2071 if (result == NULL)
2072 return NULL;
2073 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002074 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
Guido van Rossumd8faa362007-04-27 19:54:29 +00002076 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077 /* If no deletions are required, use faster code */
2078 for (i = inlen; --i >= 0; ) {
2079 c = Py_CHARMASK(*input++);
2080 if (Py_CHARMASK((*output++ = table[c])) != c)
2081 changed = 1;
2082 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002083 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 return result;
2085 Py_DECREF(result);
2086 Py_INCREF(input_obj);
2087 return input_obj;
2088 }
2089
Guido van Rossumd8faa362007-04-27 19:54:29 +00002090 if (table == NULL) {
2091 for (i = 0; i < 256; i++)
2092 trans_table[i] = Py_CHARMASK(i);
2093 } else {
2094 for (i = 0; i < 256; i++)
2095 trans_table[i] = Py_CHARMASK(table[i]);
2096 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
2098 for (i = 0; i < dellen; i++)
2099 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2100
2101 for (i = inlen; --i >= 0; ) {
2102 c = Py_CHARMASK(*input++);
2103 if (trans_table[c] != -1)
2104 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2105 continue;
2106 changed = 1;
2107 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002108 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109 Py_DECREF(result);
2110 Py_INCREF(input_obj);
2111 return input_obj;
2112 }
2113 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002114 if (inlen > 0)
2115 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 return result;
2117}
2118
2119
Thomas Wouters477c8d52006-05-27 19:21:47 +00002120#define FORWARD 1
2121#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122
Thomas Wouters477c8d52006-05-27 19:21:47 +00002123/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Thomas Wouters477c8d52006-05-27 19:21:47 +00002125#define findchar(target, target_len, c) \
2126 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127
Thomas Wouters477c8d52006-05-27 19:21:47 +00002128/* String ops must return a string. */
2129/* If the object is subclass of string, create a copy */
2130Py_LOCAL(PyStringObject *)
2131return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002133 if (PyString_CheckExact(self)) {
2134 Py_INCREF(self);
2135 return self;
2136 }
2137 return (PyStringObject *)PyString_FromStringAndSize(
2138 PyString_AS_STRING(self),
2139 PyString_GET_SIZE(self));
2140}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002143countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002144{
2145 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002146 const char *start=target;
2147 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148
Thomas Wouters477c8d52006-05-27 19:21:47 +00002149 while ( (start=findchar(start, end-start, c)) != NULL ) {
2150 count++;
2151 if (count >= maxcount)
2152 break;
2153 start += 1;
2154 }
2155 return count;
2156}
2157
2158Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002159findstring(const char *target, Py_ssize_t target_len,
2160 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161 Py_ssize_t start,
2162 Py_ssize_t end,
2163 int direction)
2164{
2165 if (start < 0) {
2166 start += target_len;
2167 if (start < 0)
2168 start = 0;
2169 }
2170 if (end > target_len) {
2171 end = target_len;
2172 } else if (end < 0) {
2173 end += target_len;
2174 if (end < 0)
2175 end = 0;
2176 }
2177
2178 /* zero-length substrings always match at the first attempt */
2179 if (pattern_len == 0)
2180 return (direction > 0) ? start : end;
2181
2182 end -= pattern_len;
2183
2184 if (direction < 0) {
2185 for (; end >= start; end--)
2186 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2187 return end;
2188 } else {
2189 for (; start <= end; start++)
2190 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2191 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192 }
2193 return -1;
2194}
2195
Thomas Wouters477c8d52006-05-27 19:21:47 +00002196Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002197countstring(const char *target, Py_ssize_t target_len,
2198 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002199 Py_ssize_t start,
2200 Py_ssize_t end,
2201 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002203 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 if (start < 0) {
2206 start += target_len;
2207 if (start < 0)
2208 start = 0;
2209 }
2210 if (end > target_len) {
2211 end = target_len;
2212 } else if (end < 0) {
2213 end += target_len;
2214 if (end < 0)
2215 end = 0;
2216 }
2217
2218 /* zero-length substrings match everywhere */
2219 if (pattern_len == 0 || maxcount == 0) {
2220 if (target_len+1 < maxcount)
2221 return target_len+1;
2222 return maxcount;
2223 }
2224
2225 end -= pattern_len;
2226 if (direction < 0) {
2227 for (; (end >= start); end--)
2228 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2229 count++;
2230 if (--maxcount <= 0) break;
2231 end -= pattern_len-1;
2232 }
2233 } else {
2234 for (; (start <= end); start++)
2235 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2236 count++;
2237 if (--maxcount <= 0)
2238 break;
2239 start += pattern_len-1;
2240 }
2241 }
2242 return count;
2243}
2244
2245
2246/* Algorithms for different cases of string replacement */
2247
2248/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2249Py_LOCAL(PyStringObject *)
2250replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002251 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252 Py_ssize_t maxcount)
2253{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002254 char *self_s, *result_s;
2255 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002256 Py_ssize_t count, i, product;
2257 PyStringObject *result;
2258
2259 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002260
Thomas Wouters477c8d52006-05-27 19:21:47 +00002261 /* 1 at the end plus 1 after every character */
2262 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002263 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 /* Check for overflow */
2267 /* result_len = count * to_len + self_len; */
2268 product = count * to_len;
2269 if (product / to_len != count) {
2270 PyErr_SetString(PyExc_OverflowError,
2271 "replace string is too long");
2272 return NULL;
2273 }
2274 result_len = product + self_len;
2275 if (result_len < 0) {
2276 PyErr_SetString(PyExc_OverflowError,
2277 "replace string is too long");
2278 return NULL;
2279 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002280
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 if (! (result = (PyStringObject *)
2282 PyString_FromStringAndSize(NULL, result_len)) )
2283 return NULL;
2284
2285 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002286 result_s = PyString_AS_STRING(result);
2287
2288 /* TODO: special case single character, which doesn't need memcpy */
2289
2290 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002291 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002292 result_s += to_len;
2293 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002294
Thomas Wouters477c8d52006-05-27 19:21:47 +00002295 for (i=0; i<count; i++) {
2296 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002297 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002298 result_s += to_len;
2299 }
2300
2301 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002302 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303
2304 return result;
2305}
2306
2307/* Special case for deleting a single character */
2308/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2309Py_LOCAL(PyStringObject *)
2310replace_delete_single_character(PyStringObject *self,
2311 char from_c, Py_ssize_t maxcount)
2312{
2313 char *self_s, *result_s;
2314 char *start, *next, *end;
2315 Py_ssize_t self_len, result_len;
2316 Py_ssize_t count;
2317 PyStringObject *result;
2318
2319 self_len = PyString_GET_SIZE(self);
2320 self_s = PyString_AS_STRING(self);
2321
2322 count = countchar(self_s, self_len, from_c, maxcount);
2323 if (count == 0) {
2324 return return_self(self);
2325 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002326
Thomas Wouters477c8d52006-05-27 19:21:47 +00002327 result_len = self_len - count; /* from_len == 1 */
2328 assert(result_len>=0);
2329
2330 if ( (result = (PyStringObject *)
2331 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2332 return NULL;
2333 result_s = PyString_AS_STRING(result);
2334
2335 start = self_s;
2336 end = self_s + self_len;
2337 while (count-- > 0) {
2338 next = findchar(start, end-start, from_c);
2339 if (next == NULL)
2340 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002341 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002342 result_s += (next-start);
2343 start = next+1;
2344 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002345 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002346
Thomas Wouters477c8d52006-05-27 19:21:47 +00002347 return result;
2348}
2349
2350/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2351
2352Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002353replace_delete_substring(PyStringObject *self,
2354 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002355 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002358 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359 Py_ssize_t count, offset;
2360 PyStringObject *result;
2361
2362 self_len = PyString_GET_SIZE(self);
2363 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364
2365 count = countstring(self_s, self_len,
2366 from_s, from_len,
2367 0, self_len, 1,
2368 maxcount);
2369
2370 if (count == 0) {
2371 /* no matches */
2372 return return_self(self);
2373 }
2374
2375 result_len = self_len - (count * from_len);
2376 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002377
Thomas Wouters477c8d52006-05-27 19:21:47 +00002378 if ( (result = (PyStringObject *)
2379 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2380 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002383
Thomas Wouters477c8d52006-05-27 19:21:47 +00002384 start = self_s;
2385 end = self_s + self_len;
2386 while (count-- > 0) {
2387 offset = findstring(start, end-start,
2388 from_s, from_len,
2389 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390 if (offset == -1)
2391 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002392 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002393
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002394 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002395
Thomas Wouters477c8d52006-05-27 19:21:47 +00002396 result_s += (next-start);
2397 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002399 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401}
2402
Thomas Wouters477c8d52006-05-27 19:21:47 +00002403/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2404Py_LOCAL(PyStringObject *)
2405replace_single_character_in_place(PyStringObject *self,
2406 char from_c, char to_c,
2407 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409 char *self_s, *result_s, *start, *end, *next;
2410 Py_ssize_t self_len;
2411 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002412
Thomas Wouters477c8d52006-05-27 19:21:47 +00002413 /* The result string will be the same size */
2414 self_s = PyString_AS_STRING(self);
2415 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002416
Thomas Wouters477c8d52006-05-27 19:21:47 +00002417 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002418
Thomas Wouters477c8d52006-05-27 19:21:47 +00002419 if (next == NULL) {
2420 /* No matches; return the original string */
2421 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002423
Thomas Wouters477c8d52006-05-27 19:21:47 +00002424 /* Need to make a new string */
2425 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2426 if (result == NULL)
2427 return NULL;
2428 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002429 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002430
Thomas Wouters477c8d52006-05-27 19:21:47 +00002431 /* change everything in-place, starting with this one */
2432 start = result_s + (next-self_s);
2433 *start = to_c;
2434 start++;
2435 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002436
Thomas Wouters477c8d52006-05-27 19:21:47 +00002437 while (--maxcount > 0) {
2438 next = findchar(start, end-start, from_c);
2439 if (next == NULL)
2440 break;
2441 *next = to_c;
2442 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002443 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002444
Thomas Wouters477c8d52006-05-27 19:21:47 +00002445 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446}
2447
Thomas Wouters477c8d52006-05-27 19:21:47 +00002448/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2449Py_LOCAL(PyStringObject *)
2450replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002451 const char *from_s, Py_ssize_t from_len,
2452 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002453 Py_ssize_t maxcount)
2454{
2455 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002456 char *self_s;
2457 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002458 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459
Thomas Wouters477c8d52006-05-27 19:21:47 +00002460 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002461
Thomas Wouters477c8d52006-05-27 19:21:47 +00002462 self_s = PyString_AS_STRING(self);
2463 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464
Thomas Wouters477c8d52006-05-27 19:21:47 +00002465 offset = findstring(self_s, self_len,
2466 from_s, from_len,
2467 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002468 if (offset == -1) {
2469 /* No matches; return the original string */
2470 return return_self(self);
2471 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002472
Thomas Wouters477c8d52006-05-27 19:21:47 +00002473 /* Need to make a new string */
2474 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2475 if (result == NULL)
2476 return NULL;
2477 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002478 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479
Thomas Wouters477c8d52006-05-27 19:21:47 +00002480 /* change everything in-place, starting with this one */
2481 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002482 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002483 start += from_len;
2484 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002485
Thomas Wouters477c8d52006-05-27 19:21:47 +00002486 while ( --maxcount > 0) {
2487 offset = findstring(start, end-start,
2488 from_s, from_len,
2489 0, end-start, FORWARD);
2490 if (offset==-1)
2491 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002492 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002493 start += offset+from_len;
2494 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002495
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496 return result;
2497}
2498
2499/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2500Py_LOCAL(PyStringObject *)
2501replace_single_character(PyStringObject *self,
2502 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002503 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 Py_ssize_t maxcount)
2505{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002506 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002507 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002508 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002509 Py_ssize_t count, product;
2510 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002511
Thomas Wouters477c8d52006-05-27 19:21:47 +00002512 self_s = PyString_AS_STRING(self);
2513 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002514
Thomas Wouters477c8d52006-05-27 19:21:47 +00002515 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002516 if (count == 0) {
2517 /* no matches, return unchanged */
2518 return return_self(self);
2519 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002520
Thomas Wouters477c8d52006-05-27 19:21:47 +00002521 /* use the difference between current and new, hence the "-1" */
2522 /* result_len = self_len + count * (to_len-1) */
2523 product = count * (to_len-1);
2524 if (product / (to_len-1) != count) {
2525 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2526 return NULL;
2527 }
2528 result_len = self_len + product;
2529 if (result_len < 0) {
2530 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2531 return NULL;
2532 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002533
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 if ( (result = (PyStringObject *)
2535 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2536 return NULL;
2537 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002538
Thomas Wouters477c8d52006-05-27 19:21:47 +00002539 start = self_s;
2540 end = self_s + self_len;
2541 while (count-- > 0) {
2542 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002543 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002544 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002545
Thomas Wouters477c8d52006-05-27 19:21:47 +00002546 if (next == start) {
2547 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002548 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002549 result_s += to_len;
2550 start += 1;
2551 } else {
2552 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002553 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002554 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002555 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 result_s += to_len;
2557 start = next+1;
2558 }
2559 }
2560 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002561 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002562
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 return result;
2564}
2565
2566/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2567Py_LOCAL(PyStringObject *)
2568replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002569 const char *from_s, Py_ssize_t from_len,
2570 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002571 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002572 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002573 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002574 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002575 Py_ssize_t count, offset, product;
2576 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002577
Thomas Wouters477c8d52006-05-27 19:21:47 +00002578 self_s = PyString_AS_STRING(self);
2579 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002580
Thomas Wouters477c8d52006-05-27 19:21:47 +00002581 count = countstring(self_s, self_len,
2582 from_s, from_len,
2583 0, self_len, FORWARD, maxcount);
2584 if (count == 0) {
2585 /* no matches, return unchanged */
2586 return return_self(self);
2587 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002588
Thomas Wouters477c8d52006-05-27 19:21:47 +00002589 /* Check for overflow */
2590 /* result_len = self_len + count * (to_len-from_len) */
2591 product = count * (to_len-from_len);
2592 if (product / (to_len-from_len) != count) {
2593 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2594 return NULL;
2595 }
2596 result_len = self_len + product;
2597 if (result_len < 0) {
2598 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2599 return NULL;
2600 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002601
Thomas Wouters477c8d52006-05-27 19:21:47 +00002602 if ( (result = (PyStringObject *)
2603 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2604 return NULL;
2605 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002606
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607 start = self_s;
2608 end = self_s + self_len;
2609 while (count-- > 0) {
2610 offset = findstring(start, end-start,
2611 from_s, from_len,
2612 0, end-start, FORWARD);
2613 if (offset == -1)
2614 break;
2615 next = start+offset;
2616 if (next == start) {
2617 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002618 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002619 result_s += to_len;
2620 start += from_len;
2621 } else {
2622 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002623 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002624 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002625 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002626 result_s += to_len;
2627 start = next+from_len;
2628 }
2629 }
2630 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002631 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002632
Thomas Wouters477c8d52006-05-27 19:21:47 +00002633 return result;
2634}
2635
2636
2637Py_LOCAL(PyStringObject *)
2638replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002639 const char *from_s, Py_ssize_t from_len,
2640 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 Py_ssize_t maxcount)
2642{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002643 if (maxcount < 0) {
2644 maxcount = PY_SSIZE_T_MAX;
2645 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2646 /* nothing to do; return the original string */
2647 return return_self(self);
2648 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002649
Thomas Wouters477c8d52006-05-27 19:21:47 +00002650 if (maxcount == 0 ||
2651 (from_len == 0 && to_len == 0)) {
2652 /* nothing to do; return the original string */
2653 return return_self(self);
2654 }
2655
2656 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002657
Thomas Wouters477c8d52006-05-27 19:21:47 +00002658 if (from_len == 0) {
2659 /* insert the 'to' string everywhere. */
2660 /* >>> "Python".replace("", ".") */
2661 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002662 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002663 }
2664
2665 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2666 /* point for an empty self string to generate a non-empty string */
2667 /* Special case so the remaining code always gets a non-empty string */
2668 if (PyString_GET_SIZE(self) == 0) {
2669 return return_self(self);
2670 }
2671
2672 if (to_len == 0) {
2673 /* delete all occurances of 'from' string */
2674 if (from_len == 1) {
2675 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002676 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002677 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002679 }
2680 }
2681
2682 /* Handle special case where both strings have the same length */
2683
2684 if (from_len == to_len) {
2685 if (from_len == 1) {
2686 return replace_single_character_in_place(
2687 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688 from_s[0],
2689 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002690 maxcount);
2691 } else {
2692 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002693 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002694 }
2695 }
2696
2697 /* Otherwise use the more generic algorithms */
2698 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002699 return replace_single_character(self, from_s[0],
2700 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002701 } else {
2702 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002703 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002704 }
2705}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002706
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002707PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002708"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002709\n\
2710Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002711old replaced by new. If the optional argument count is\n\
2712given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002713
2714static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002715string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002716{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002717 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002718 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002719 const char *from_s, *to_s;
2720 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002721
Thomas Wouters477c8d52006-05-27 19:21:47 +00002722 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002723 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724
Thomas Wouters477c8d52006-05-27 19:21:47 +00002725 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002726 from_s = PyString_AS_STRING(from);
2727 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002728 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002729 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002730 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002731 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002732 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733 return NULL;
2734
Thomas Wouters477c8d52006-05-27 19:21:47 +00002735 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736 to_s = PyString_AS_STRING(to);
2737 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002738 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002739 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002740 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002741 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002742 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743 return NULL;
2744
Thomas Wouters477c8d52006-05-27 19:21:47 +00002745 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002746 from_s, from_len,
2747 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002748}
2749
Thomas Wouters477c8d52006-05-27 19:21:47 +00002750/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002751
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002752/* Matches the end (direction >= 0) or start (direction < 0) of self
2753 * against substr, using the start and end arguments. Returns
2754 * -1 on error, 0 if not found and 1 if found.
2755 */
2756Py_LOCAL(int)
2757_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2758 Py_ssize_t end, int direction)
2759{
2760 Py_ssize_t len = PyString_GET_SIZE(self);
2761 Py_ssize_t slen;
2762 const char* sub;
2763 const char* str;
2764
2765 if (PyString_Check(substr)) {
2766 sub = PyString_AS_STRING(substr);
2767 slen = PyString_GET_SIZE(substr);
2768 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002769 else if (PyUnicode_Check(substr))
2770 return PyUnicode_Tailmatch((PyObject *)self,
2771 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002772 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2773 return -1;
2774 str = PyString_AS_STRING(self);
2775
2776 string_adjust_indices(&start, &end, len);
2777
2778 if (direction < 0) {
2779 /* startswith */
2780 if (start+slen > len)
2781 return 0;
2782 } else {
2783 /* endswith */
2784 if (end-start < slen || start > len)
2785 return 0;
2786
2787 if (end-slen > start)
2788 start = end - slen;
2789 }
2790 if (end-start >= slen)
2791 return ! memcmp(str+start, sub, slen);
2792 return 0;
2793}
2794
2795
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002796PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002797"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002798\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002799Return True if S starts with the specified prefix, False otherwise.\n\
2800With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002801With optional end, stop comparing S at that position.\n\
2802prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002803
2804static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002805string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002806{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002807 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002808 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002809 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002810 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002811
Guido van Rossumc6821402000-05-08 14:08:05 +00002812 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2813 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002815 if (PyTuple_Check(subobj)) {
2816 Py_ssize_t i;
2817 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2818 result = _string_tailmatch(self,
2819 PyTuple_GET_ITEM(subobj, i),
2820 start, end, -1);
2821 if (result == -1)
2822 return NULL;
2823 else if (result) {
2824 Py_RETURN_TRUE;
2825 }
2826 }
2827 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002829 result = _string_tailmatch(self, subobj, start, end, -1);
2830 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002831 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002832 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002833 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002834}
2835
2836
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002837PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002838"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002839\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002840Return True if S ends with the specified suffix, False otherwise.\n\
2841With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002842With optional end, stop comparing S at that position.\n\
2843suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002844
2845static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002846string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002847{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002848 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002849 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002851 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002852
Guido van Rossumc6821402000-05-08 14:08:05 +00002853 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2854 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002855 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002856 if (PyTuple_Check(subobj)) {
2857 Py_ssize_t i;
2858 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2859 result = _string_tailmatch(self,
2860 PyTuple_GET_ITEM(subobj, i),
2861 start, end, +1);
2862 if (result == -1)
2863 return NULL;
2864 else if (result) {
2865 Py_RETURN_TRUE;
2866 }
2867 }
2868 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002870 result = _string_tailmatch(self, subobj, start, end, +1);
2871 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002872 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002873 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002874 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002875}
2876
2877
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002878PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002879"S.decode([encoding[,errors]]) -> object\n\
2880\n\
2881Decodes S using the codec registered for encoding. encoding defaults\n\
2882to the default encoding. errors may be given to set a different error\n\
2883handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002884a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2885as well as any other name registerd with codecs.register_error that is\n\
2886able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002887
2888static PyObject *
2889string_decode(PyStringObject *self, PyObject *args)
2890{
2891 char *encoding = NULL;
2892 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002893 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002894
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002895 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2896 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002897 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002898 if (v == NULL)
2899 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002900 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2901 PyErr_Format(PyExc_TypeError,
2902 "decoder did not return a string/unicode object "
2903 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002904 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002905 Py_DECREF(v);
2906 return NULL;
2907 }
2908 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002909
2910 onError:
2911 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002912}
2913
2914
Guido van Rossumae404e22007-10-26 21:46:44 +00002915PyDoc_STRVAR(fromhex_doc,
2916"str8.fromhex(string) -> str8\n\
2917\n\
2918Create a str8 object from a string of hexadecimal numbers.\n\
2919Spaces between two numbers are accepted. Example:\n\
2920str8.fromhex('10 1112') -> s'\\x10\\x11\\x12'.");
2921
2922static int
2923hex_digit_to_int(Py_UNICODE c)
2924{
2925 if (c >= 128)
2926 return -1;
2927 if (ISDIGIT(c))
2928 return c - '0';
2929 else {
2930 if (ISUPPER(c))
2931 c = TOLOWER(c);
2932 if (c >= 'a' && c <= 'f')
2933 return c - 'a' + 10;
2934 }
2935 return -1;
2936}
2937
2938static PyObject *
2939string_fromhex(PyObject *cls, PyObject *args)
2940{
2941 PyObject *newstring, *hexobj;
2942 char *buf;
2943 Py_UNICODE *hex;
2944 Py_ssize_t hexlen, byteslen, i, j;
2945 int top, bot;
2946
2947 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2948 return NULL;
2949 assert(PyUnicode_Check(hexobj));
2950 hexlen = PyUnicode_GET_SIZE(hexobj);
2951 hex = PyUnicode_AS_UNICODE(hexobj);
2952 byteslen = hexlen/2; /* This overestimates if there are spaces */
2953 newstring = PyString_FromStringAndSize(NULL, byteslen);
2954 if (!newstring)
2955 return NULL;
2956 buf = PyString_AS_STRING(newstring);
2957 for (i = j = 0; i < hexlen; i += 2) {
2958 /* skip over spaces in the input */
2959 while (hex[i] == ' ')
2960 i++;
2961 if (i >= hexlen)
2962 break;
2963 top = hex_digit_to_int(hex[i]);
2964 bot = hex_digit_to_int(hex[i+1]);
2965 if (top == -1 || bot == -1) {
2966 PyErr_Format(PyExc_ValueError,
2967 "non-hexadecimal number found in "
2968 "fromhex() arg at position %zd", i);
2969 goto error;
2970 }
2971 buf[j++] = (top << 4) + bot;
2972 }
2973 if (_PyString_Resize(&newstring, j) < 0)
2974 goto error;
2975 return newstring;
2976
2977 error:
2978 Py_DECREF(newstring);
2979 return NULL;
2980}
2981
2982
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002983static PyObject *
2984string_getnewargs(PyStringObject *v)
2985{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002986 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002987}
2988
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002989
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002990static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002991string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002992 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002993 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2994 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002995 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002996 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002997 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002998 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002999 endswith__doc__},
3000 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3001 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003002 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003003 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
3004 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003005 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003006 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3007 _Py_isalnum__doc__},
3008 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3009 _Py_isalpha__doc__},
3010 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3011 _Py_isdigit__doc__},
3012 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3013 _Py_islower__doc__},
3014 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3015 _Py_isspace__doc__},
3016 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3017 _Py_istitle__doc__},
3018 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3019 _Py_isupper__doc__},
3020 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3021 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3022 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003023 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003024 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003025 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3026 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3027 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003028 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003029 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3030 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003031 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3032 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3033 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3034 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3035 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003036 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00003037 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003038 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003039 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3040 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003041 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003042 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3043 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003044 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003045 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046 {NULL, NULL} /* sentinel */
3047};
3048
Jeremy Hylton938ace62002-07-17 16:30:39 +00003049static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003050str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3051
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003052static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003053string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003055 PyObject *x = NULL, *it;
3056 PyObject *(*iternext)(PyObject *);
3057 const char *encoding = NULL;
3058 const char *errors = NULL;
3059 PyObject *new = NULL;
3060 Py_ssize_t i, size;
3061 static char *kwlist[] = {"object", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003062
Guido van Rossumae960af2001-08-30 03:11:59 +00003063 if (type != &PyString_Type)
3064 return str_subtype_new(type, args, kwds);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003065 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
3066 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003067 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003068 if (x == NULL) {
3069 if (encoding != NULL || errors != NULL) {
3070 PyErr_SetString(PyExc_TypeError,
3071 "encoding or errors without sequence "
3072 "argument");
3073 return NULL;
3074 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00003075 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003076 }
3077
3078 if (PyUnicode_Check(x)) {
3079 /* Encode via the codec registry */
3080 if (encoding == NULL) {
3081 PyErr_SetString(PyExc_TypeError,
3082 "string argument without an encoding");
3083 return NULL;
3084 }
3085 new = PyCodec_Encode(x, encoding, errors);
3086 if (new == NULL)
3087 return NULL;
3088 /* XXX(gb): must accept bytes here since codecs output bytes
3089 at the moment */
3090 if (PyBytes_Check(new)) {
3091 PyObject *str;
3092 str = PyString_FromString(PyBytes_AsString(new));
3093 Py_DECREF(new);
3094 if (!str)
3095 return NULL;
3096 return str;
3097 }
3098 if (!PyString_Check(new)) {
3099 PyErr_Format(PyExc_TypeError,
3100 "encoder did not return a str8 "
3101 "object (type=%.400s)",
3102 Py_Type(new)->tp_name);
3103 Py_DECREF(new);
3104 return NULL;
3105 }
3106 return new;
3107 }
3108
3109 /* If it's not unicode, there can't be encoding or errors */
3110 if (encoding != NULL || errors != NULL) {
3111 PyErr_SetString(PyExc_TypeError,
3112 "encoding or errors without a string argument");
3113 return NULL;
3114 }
3115
3116 /* Use the modern buffer interface */
3117 if (PyObject_CheckBuffer(x)) {
3118 Py_buffer view;
3119 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3120 return NULL;
3121 new = PyString_FromStringAndSize(NULL, view.len);
3122 if (!new)
3123 goto fail;
3124 // XXX(brett.cannon): Better way to get to internal buffer?
3125 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
3126 &view, view.len, 'C') < 0)
3127 goto fail;
3128 PyObject_ReleaseBuffer(x, &view);
3129 return new;
3130 fail:
3131 Py_XDECREF(new);
3132 PyObject_ReleaseBuffer(x, &view);
3133 return NULL;
3134 }
3135
3136 /* For the iterator version, create a string object and resize as needed. */
3137 /* XXX(gb): is 64 a good value? also, optimize this if length is known */
3138 size = 64;
3139 new = PyString_FromStringAndSize(NULL, size);
3140 if (new == NULL)
3141 return NULL;
3142
3143 /* XXX Optimize this if the arguments is a list, tuple */
3144
3145 /* Get the iterator */
3146 it = PyObject_GetIter(x);
3147 if (it == NULL)
3148 goto error;
3149 // XXX(brett.cannon): No API for this?
3150 iternext = *Py_Type(it)->tp_iternext;
3151
3152 /* Run the iterator to exhaustion */
3153 for (i = 0; ; i++) {
3154 PyObject *item;
3155 Py_ssize_t value;
3156
3157 /* Get the next item */
3158 item = iternext(it);
3159 if (item == NULL) {
3160 if (PyErr_Occurred()) {
3161 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
3162 goto error;
3163 PyErr_Clear();
3164 }
3165 break;
3166 }
3167
3168 /* Interpret it as an int (__index__) */
3169 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3170 Py_DECREF(item);
3171 if (value == -1 && PyErr_Occurred())
3172 goto error;
3173
3174 /* Range check */
3175 if (value < 0 || value >= 256) {
3176 PyErr_SetString(PyExc_ValueError,
3177 "bytes must be in range(0, 256)");
3178 goto error;
3179 }
3180
3181 /* Append the byte */
3182 if (i >= size) {
3183 size *= 2;
3184 if (_PyString_Resize(&new, size) < 0)
3185 goto error;
3186 }
3187 ((PyStringObject *)new)->ob_sval[i] = value;
3188 }
3189 _PyString_Resize(&new, i);
3190
3191 /* Clean up and return success */
3192 Py_DECREF(it);
3193 return new;
3194
3195 error:
3196 /* Error handling when it != NULL */
3197 Py_XDECREF(it);
3198 Py_DECREF(new);
3199 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003200}
3201
Guido van Rossumae960af2001-08-30 03:11:59 +00003202static PyObject *
3203str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3204{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003205 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003206 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003207
3208 assert(PyType_IsSubtype(type, &PyString_Type));
3209 tmp = string_new(&PyString_Type, args, kwds);
3210 if (tmp == NULL)
3211 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003212 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003213 n = PyString_GET_SIZE(tmp);
3214 pnew = type->tp_alloc(type, n);
3215 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003216 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003217 ((PyStringObject *)pnew)->ob_shash =
3218 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003219 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003220 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003221 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003222 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003223}
3224
Guido van Rossumcacfc072002-05-24 19:01:59 +00003225static PyObject *
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003226string_mod(PyObject *v, PyObject *w)
3227{
3228 if (!PyString_Check(v)) {
3229 Py_INCREF(Py_NotImplemented);
3230 return Py_NotImplemented;
3231 }
3232 return PyString_Format(v, w);
3233}
3234
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003235static PyNumberMethods string_as_number = {
3236 0, /*nb_add*/
3237 0, /*nb_subtract*/
3238 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003239 string_mod, /*nb_remainder*/
3240};
3241
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003242PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003243"str(object) -> string\n\
3244\n\
3245Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003246If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003247
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003248static PyObject *str_iter(PyObject *seq);
3249
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003251 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003252 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003253 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003254 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003255 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003256 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003257 0, /* tp_getattr */
3258 0, /* tp_setattr */
3259 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003260 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003261 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003262 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003263 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003264 (hashfunc)string_hash, /* tp_hash */
3265 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003266 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003267 PyObject_GenericGetAttr, /* tp_getattro */
3268 0, /* tp_setattro */
3269 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003270 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3271 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003272 string_doc, /* tp_doc */
3273 0, /* tp_traverse */
3274 0, /* tp_clear */
3275 (richcmpfunc)string_richcompare, /* tp_richcompare */
3276 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003277 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003278 0, /* tp_iternext */
3279 string_methods, /* tp_methods */
3280 0, /* tp_members */
3281 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003282 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003283 0, /* tp_dict */
3284 0, /* tp_descr_get */
3285 0, /* tp_descr_set */
3286 0, /* tp_dictoffset */
3287 0, /* tp_init */
3288 0, /* tp_alloc */
3289 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003290 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003291};
3292
3293void
Fred Drakeba096332000-07-09 07:04:36 +00003294PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003295{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003297 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003298 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003299 if (w == NULL || !PyString_Check(*pv)) {
3300 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003301 *pv = NULL;
3302 return;
3303 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003304 v = string_concat((PyStringObject *) *pv, w);
3305 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003306 *pv = v;
3307}
3308
Guido van Rossum013142a1994-08-30 08:19:36 +00003309void
Fred Drakeba096332000-07-09 07:04:36 +00003310PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003311{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 PyString_Concat(pv, w);
3313 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003314}
3315
3316
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003317/* The following function breaks the notion that strings are immutable:
3318 it changes the size of a string. We get away with this only if there
3319 is only one module referencing the object. You can also think of it
3320 as creating a new string object and destroying the old one, only
3321 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003322 already be known to some other part of the code...
3323 Note that if there's not enough memory to resize the string, the original
3324 string object at *pv is deallocated, *pv is set to NULL, an "out of
3325 memory" exception is set, and -1 is returned. Else (on success) 0 is
3326 returned, and the value in *pv may or may not be the same as on input.
3327 As always, an extra byte is allocated for a trailing \0 byte (newsize
3328 does *not* include that), and a trailing \0 byte is stored.
3329*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003330
3331int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003332_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003333{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003334 register PyObject *v;
3335 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003336 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003337 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00003338 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003339 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003340 Py_DECREF(v);
3341 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003342 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003343 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003344 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003345 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003346 _Py_ForgetReference(v);
3347 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003348 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003349 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003350 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003351 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003352 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003353 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003354 _Py_NewReference(*pv);
3355 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003356 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003357 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003358 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003359 return 0;
3360}
Guido van Rossume5372401993-03-16 12:15:04 +00003361
3362/* Helpers for formatstring */
3363
Thomas Wouters477c8d52006-05-27 19:21:47 +00003364Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00003365getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003366{
Thomas Wouters977485d2006-02-16 15:59:12 +00003367 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003368 if (argidx < arglen) {
3369 (*p_argidx)++;
3370 if (arglen < 0)
3371 return args;
3372 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003373 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003374 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003375 PyErr_SetString(PyExc_TypeError,
3376 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003377 return NULL;
3378}
3379
Tim Peters38fd5b62000-09-21 05:43:11 +00003380/* Format codes
3381 * F_LJUST '-'
3382 * F_SIGN '+'
3383 * F_BLANK ' '
3384 * F_ALT '#'
3385 * F_ZERO '0'
3386 */
Guido van Rossume5372401993-03-16 12:15:04 +00003387#define F_LJUST (1<<0)
3388#define F_SIGN (1<<1)
3389#define F_BLANK (1<<2)
3390#define F_ALT (1<<3)
3391#define F_ZERO (1<<4)
3392
Thomas Wouters477c8d52006-05-27 19:21:47 +00003393Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003394formatfloat(char *buf, size_t buflen, int flags,
3395 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003396{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003397 /* fmt = '%#.' + `prec` + `type`
3398 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003399 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003400 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003401 x = PyFloat_AsDouble(v);
3402 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00003403 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003404 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003405 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003406 }
Guido van Rossume5372401993-03-16 12:15:04 +00003407 if (prec < 0)
3408 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003409 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3410 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003411 /* Worst case length calc to ensure no buffer overrun:
3412
3413 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003414 fmt = %#.<prec>g
3415 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003416 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003417 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003418
3419 'f' formats:
3420 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3421 len = 1 + 50 + 1 + prec = 52 + prec
3422
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003423 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003424 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003425
3426 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00003427 if (((type == 'g' || type == 'G') &&
3428 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003429 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003430 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003431 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003432 return -1;
3433 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003434 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3435 (flags&F_ALT) ? "#" : "",
3436 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003437 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003438 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003439}
3440
Tim Peters38fd5b62000-09-21 05:43:11 +00003441/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3442 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3443 * Python's regular ints.
3444 * Return value: a new PyString*, or NULL if error.
3445 * . *pbuf is set to point into it,
3446 * *plen set to the # of chars following that.
3447 * Caller must decref it when done using pbuf.
3448 * The string starting at *pbuf is of the form
3449 * "-"? ("0x" | "0X")? digit+
3450 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003451 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003452 * There will be at least prec digits, zero-filled on the left if
3453 * necessary to get that many.
3454 * val object to be converted
3455 * flags bitmask of format flags; only F_ALT is looked at
3456 * prec minimum number of digits; 0-fill on left if needed
3457 * type a character in [duoxX]; u acts the same as d
3458 *
3459 * CAUTION: o, x and X conversions on regular ints can never
3460 * produce a '-' sign, but can for Python's unbounded ints.
3461 */
3462PyObject*
3463_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3464 char **pbuf, int *plen)
3465{
3466 PyObject *result = NULL;
3467 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003468 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003469 int sign; /* 1 if '-', else 0 */
3470 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003471 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003472 int numdigits; /* len == numnondigits + numdigits */
3473 int numnondigits = 0;
3474
Guido van Rossumddefaf32007-01-14 03:31:43 +00003475 /* Avoid exceeding SSIZE_T_MAX */
3476 if (prec > PY_SSIZE_T_MAX-3) {
3477 PyErr_SetString(PyExc_OverflowError,
3478 "precision too large");
3479 return NULL;
3480 }
3481
Tim Peters38fd5b62000-09-21 05:43:11 +00003482 switch (type) {
3483 case 'd':
3484 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003485 /* Special-case boolean: we want 0/1 */
3486 if (PyBool_Check(val))
3487 result = PyNumber_ToBase(val, 10);
3488 else
3489 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003490 break;
3491 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003492 numnondigits = 2;
3493 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003494 break;
3495 case 'x':
3496 case 'X':
3497 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003498 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003499 break;
3500 default:
3501 assert(!"'type' not in [duoxX]");
3502 }
3503 if (!result)
3504 return NULL;
3505
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003506 buf = PyString_AsString(result);
3507 if (!buf) {
3508 Py_DECREF(result);
3509 return NULL;
3510 }
3511
Tim Peters38fd5b62000-09-21 05:43:11 +00003512 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003513 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003514 PyErr_BadInternalCall();
3515 return NULL;
3516 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00003517 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003518 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003519 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3520 return NULL;
3521 }
3522 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003523 if (buf[len-1] == 'L') {
3524 --len;
3525 buf[len] = '\0';
3526 }
3527 sign = buf[0] == '-';
3528 numnondigits += sign;
3529 numdigits = len - numnondigits;
3530 assert(numdigits > 0);
3531
Tim Petersfff53252001-04-12 18:38:48 +00003532 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003533 if (((flags & F_ALT) == 0 &&
3534 (type == 'o' || type == 'x' || type == 'X'))) {
3535 assert(buf[sign] == '0');
3536 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3537 buf[sign+1] == 'o');
3538 numnondigits -= 2;
3539 buf += 2;
3540 len -= 2;
3541 if (sign)
3542 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003543 assert(len == numnondigits + numdigits);
3544 assert(numdigits > 0);
3545 }
3546
3547 /* Fill with leading zeroes to meet minimum width. */
3548 if (prec > numdigits) {
3549 PyObject *r1 = PyString_FromStringAndSize(NULL,
3550 numnondigits + prec);
3551 char *b1;
3552 if (!r1) {
3553 Py_DECREF(result);
3554 return NULL;
3555 }
3556 b1 = PyString_AS_STRING(r1);
3557 for (i = 0; i < numnondigits; ++i)
3558 *b1++ = *buf++;
3559 for (i = 0; i < prec - numdigits; i++)
3560 *b1++ = '0';
3561 for (i = 0; i < numdigits; i++)
3562 *b1++ = *buf++;
3563 *b1 = '\0';
3564 Py_DECREF(result);
3565 result = r1;
3566 buf = PyString_AS_STRING(result);
3567 len = numnondigits + prec;
3568 }
3569
3570 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003571 if (type == 'X') {
3572 /* Need to convert all lower case letters to upper case.
3573 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003574 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003575 if (buf[i] >= 'a' && buf[i] <= 'x')
3576 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003577 }
3578 *pbuf = buf;
3579 *plen = len;
3580 return result;
3581}
3582
Thomas Wouters477c8d52006-05-27 19:21:47 +00003583Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003584formatint(char *buf, size_t buflen, int flags,
3585 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003586{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003587 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003588 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3589 + 1 + 1 = 24 */
3590 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003591 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003592 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003593
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003594 x = PyInt_AsLong(v);
3595 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00003596 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003597 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003598 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003599 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003600 if (x < 0 && type == 'u') {
3601 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003602 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003603 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3604 sign = "-";
3605 else
3606 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003607 if (prec < 0)
3608 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003609
3610 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003611 (type == 'x' || type == 'X' || type == 'o')) {
3612 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003613 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003614 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003615 * - when 0 is being converted, the C standard leaves off
3616 * the '0x' or '0X', which is inconsistent with other
3617 * %#x/%#X conversions and inconsistent with Python's
3618 * hex() function
3619 * - there are platforms that violate the standard and
3620 * convert 0 with the '0x' or '0X'
3621 * (Metrowerks, Compaq Tru64)
3622 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003623 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003624 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003625 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003626 * We can achieve the desired consistency by inserting our
3627 * own '0x' or '0X' prefix, and substituting %x/%X in place
3628 * of %#x/%#X.
3629 *
3630 * Note that this is the same approach as used in
3631 * formatint() in unicodeobject.c
3632 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003633 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3634 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003635 }
3636 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003637 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3638 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003639 prec, type);
3640 }
3641
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003642 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003643 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003644 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003645 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003646 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003647 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003648 return -1;
3649 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003650 if (sign[0])
3651 PyOS_snprintf(buf, buflen, fmt, -x);
3652 else
3653 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003654 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003655}
3656
Thomas Wouters477c8d52006-05-27 19:21:47 +00003657Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00003658formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003659{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003660 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003661 if (PyString_Check(v)) {
3662 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003663 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003664 }
3665 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003666 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003667 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003668 }
3669 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003670 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003671}
3672
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003673/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3674
3675 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3676 chars are formatted. XXX This is a magic number. Each formatting
3677 routine does bounds checking to ensure no overflow, but a better
3678 solution may be to malloc a buffer of appropriate size for each
3679 format. For now, the current solution is sufficient.
3680*/
3681#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003682
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003683PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003684PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003685{
3686 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003687 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003688 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003689 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003690 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003691 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003692 PyObject *dict = NULL;
3693 if (format == NULL || !PyString_Check(format) || args == NULL) {
3694 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003695 return NULL;
3696 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003697 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003698 fmt = PyString_AS_STRING(format);
3699 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003700 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003701 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003702 if (result == NULL)
3703 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003704 res = PyString_AsString(result);
3705 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003706 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003707 argidx = 0;
3708 }
3709 else {
3710 arglen = -1;
3711 argidx = -2;
3712 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003713 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003714 !PyString_Check(args) && !PyUnicode_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003715 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003716 while (--fmtcnt >= 0) {
3717 if (*fmt != '%') {
3718 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003719 rescnt = fmtcnt + 100;
3720 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003721 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003722 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003723 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003724 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003725 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003726 }
3727 *res++ = *fmt++;
3728 }
3729 else {
3730 /* Got a format specifier */
3731 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003732 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003733 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003734 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003735 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003736 PyObject *v = NULL;
3737 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003738 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003739 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003740 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003741 char formatbuf[FORMATBUFLEN];
3742 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00003743 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003744 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003745
Guido van Rossumda9c2711996-12-05 21:58:58 +00003746 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003747 if (*fmt == '(') {
3748 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003749 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003750 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003751 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003752
3753 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003754 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003755 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003756 goto error;
3757 }
3758 ++fmt;
3759 --fmtcnt;
3760 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003761 /* Skip over balanced parentheses */
3762 while (pcount > 0 && --fmtcnt >= 0) {
3763 if (*fmt == ')')
3764 --pcount;
3765 else if (*fmt == '(')
3766 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003767 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003768 }
3769 keylen = fmt - keystart - 1;
3770 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003771 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003772 "incomplete format key");
3773 goto error;
3774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003775 key = PyString_FromStringAndSize(keystart,
3776 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003777 if (key == NULL)
3778 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003779 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003780 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003781 args_owned = 0;
3782 }
3783 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003784 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003785 if (args == NULL) {
3786 goto error;
3787 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003788 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003789 arglen = -1;
3790 argidx = -2;
3791 }
Guido van Rossume5372401993-03-16 12:15:04 +00003792 while (--fmtcnt >= 0) {
3793 switch (c = *fmt++) {
3794 case '-': flags |= F_LJUST; continue;
3795 case '+': flags |= F_SIGN; continue;
3796 case ' ': flags |= F_BLANK; continue;
3797 case '#': flags |= F_ALT; continue;
3798 case '0': flags |= F_ZERO; continue;
3799 }
3800 break;
3801 }
3802 if (c == '*') {
3803 v = getnextarg(args, arglen, &argidx);
3804 if (v == NULL)
3805 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003806 if (!PyInt_Check(v)) {
3807 PyErr_SetString(PyExc_TypeError,
3808 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003809 goto error;
3810 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003811 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00003812 if (width == -1 && PyErr_Occurred())
3813 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003814 if (width < 0) {
3815 flags |= F_LJUST;
3816 width = -width;
3817 }
Guido van Rossume5372401993-03-16 12:15:04 +00003818 if (--fmtcnt >= 0)
3819 c = *fmt++;
3820 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003821 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003822 width = c - '0';
3823 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003824 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003825 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00003826 break;
3827 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003828 PyErr_SetString(
3829 PyExc_ValueError,
3830 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003831 goto error;
3832 }
3833 width = width*10 + (c - '0');
3834 }
3835 }
3836 if (c == '.') {
3837 prec = 0;
3838 if (--fmtcnt >= 0)
3839 c = *fmt++;
3840 if (c == '*') {
3841 v = getnextarg(args, arglen, &argidx);
3842 if (v == NULL)
3843 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003844 if (!PyInt_Check(v)) {
3845 PyErr_SetString(
3846 PyExc_TypeError,
3847 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003848 goto error;
3849 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003850 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00003851 if (prec == -1 && PyErr_Occurred())
3852 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003853 if (prec < 0)
3854 prec = 0;
3855 if (--fmtcnt >= 0)
3856 c = *fmt++;
3857 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003858 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003859 prec = c - '0';
3860 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003861 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003862 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00003863 break;
3864 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003865 PyErr_SetString(
3866 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003867 "prec too big");
3868 goto error;
3869 }
3870 prec = prec*10 + (c - '0');
3871 }
3872 }
3873 } /* prec */
3874 if (fmtcnt >= 0) {
3875 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003876 if (--fmtcnt >= 0)
3877 c = *fmt++;
3878 }
3879 }
3880 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003881 PyErr_SetString(PyExc_ValueError,
3882 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003883 goto error;
3884 }
3885 if (c != '%') {
3886 v = getnextarg(args, arglen, &argidx);
3887 if (v == NULL)
3888 goto error;
3889 }
3890 sign = 0;
3891 fill = ' ';
3892 switch (c) {
3893 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003894 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003895 len = 1;
3896 break;
3897 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00003898 if (PyUnicode_Check(v)) {
3899 fmt = fmt_start;
3900 argidx = argidx_start;
3901 goto unicode;
3902 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00003903 temp = _PyObject_Str(v);
3904 if (temp != NULL && PyUnicode_Check(temp)) {
3905 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00003906 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003907 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003908 goto unicode;
3909 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003910 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00003911 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00003912 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00003913 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003914 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003915 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003916 if (!PyString_Check(temp)) {
3917 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003918 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003919 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003920 goto error;
3921 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003922 pbuf = PyString_AS_STRING(temp);
3923 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003924 if (prec >= 0 && len > prec)
3925 len = prec;
3926 break;
3927 case 'i':
3928 case 'd':
3929 case 'u':
3930 case 'o':
3931 case 'x':
3932 case 'X':
3933 if (c == 'i')
3934 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003935 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003936 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003937 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00003938 prec, c, &pbuf, &ilen);
3939 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003940 if (!temp)
3941 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00003942 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003943 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003944 else {
3945 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003946 len = formatint(pbuf,
3947 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003948 flags, prec, c, v);
3949 if (len < 0)
3950 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003951 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003952 }
3953 if (flags & F_ZERO)
3954 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003955 break;
3956 case 'e':
3957 case 'E':
3958 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003959 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00003960 case 'g':
3961 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003962 if (c == 'F')
3963 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003964 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003965 len = formatfloat(pbuf, sizeof(formatbuf),
3966 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003967 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003968 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003969 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003970 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003971 fill = '0';
3972 break;
3973 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00003974 if (PyUnicode_Check(v)) {
3975 fmt = fmt_start;
3976 argidx = argidx_start;
3977 goto unicode;
3978 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003979 pbuf = formatbuf;
3980 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003981 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003982 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003983 break;
3984 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003985 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003986 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00003987 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00003988 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00003989 (Py_ssize_t)(fmt - 1 -
3990 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003991 goto error;
3992 }
3993 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003994 if (*pbuf == '-' || *pbuf == '+') {
3995 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003996 len--;
3997 }
3998 else if (flags & F_SIGN)
3999 sign = '+';
4000 else if (flags & F_BLANK)
4001 sign = ' ';
4002 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004003 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004004 }
4005 if (width < len)
4006 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004007 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004008 reslen -= rescnt;
4009 rescnt = width + fmtcnt + 100;
4010 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004011 if (reslen < 0) {
4012 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004013 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004014 return PyErr_NoMemory();
4015 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004016 if (_PyString_Resize(&result, reslen) < 0) {
4017 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004018 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004019 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004020 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004022 }
4023 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004024 if (fill != ' ')
4025 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004026 rescnt--;
4027 if (width > len)
4028 width--;
4029 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004030 if ((flags & F_ALT) &&
4031 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004032 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004033 assert(pbuf[1] == c);
4034 if (fill != ' ') {
4035 *res++ = *pbuf++;
4036 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004037 }
Tim Petersfff53252001-04-12 18:38:48 +00004038 rescnt -= 2;
4039 width -= 2;
4040 if (width < 0)
4041 width = 0;
4042 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004043 }
4044 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004045 do {
4046 --rescnt;
4047 *res++ = fill;
4048 } while (--width > len);
4049 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004050 if (fill == ' ') {
4051 if (sign)
4052 *res++ = sign;
4053 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004054 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004055 assert(pbuf[0] == '0');
4056 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004057 *res++ = *pbuf++;
4058 *res++ = *pbuf++;
4059 }
4060 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004061 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004062 res += len;
4063 rescnt -= len;
4064 while (--width >= len) {
4065 --rescnt;
4066 *res++ = ' ';
4067 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004068 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004069 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004070 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004071 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004072 goto error;
4073 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004074 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004075 } /* '%' */
4076 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004077 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004078 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004079 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004080 goto error;
4081 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004082 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004085 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004086 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004087
4088 unicode:
4089 if (args_owned) {
4090 Py_DECREF(args);
4091 args_owned = 0;
4092 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004093 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004094 if (PyTuple_Check(orig_args) && argidx > 0) {
4095 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004096 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004097 v = PyTuple_New(n);
4098 if (v == NULL)
4099 goto error;
4100 while (--n >= 0) {
4101 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4102 Py_INCREF(w);
4103 PyTuple_SET_ITEM(v, n, w);
4104 }
4105 args = v;
4106 } else {
4107 Py_INCREF(orig_args);
4108 args = orig_args;
4109 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004110 args_owned = 1;
4111 /* Take what we have of the result and let the Unicode formatting
4112 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004113 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004114 if (_PyString_Resize(&result, rescnt))
4115 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004116 fmtcnt = PyString_GET_SIZE(format) - \
4117 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004118 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4119 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004120 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004121 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004122 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004123 if (v == NULL)
4124 goto error;
4125 /* Paste what we have (result) to what the Unicode formatting
4126 function returned (v) and return the result (or error) */
4127 w = PyUnicode_Concat(result, v);
4128 Py_DECREF(result);
4129 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004130 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004131 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004132
Guido van Rossume5372401993-03-16 12:15:04 +00004133 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004135 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004136 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004137 }
Guido van Rossume5372401993-03-16 12:15:04 +00004138 return NULL;
4139}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004140
Guido van Rossum2a61e741997-01-18 07:55:05 +00004141void
Fred Drakeba096332000-07-09 07:04:36 +00004142PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004143{
4144 register PyStringObject *s = (PyStringObject *)(*p);
4145 PyObject *t;
4146 if (s == NULL || !PyString_Check(s))
4147 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004148 /* If it's a string subclass, we don't really know what putting
4149 it in the interned dict might do. */
4150 if (!PyString_CheckExact(s))
4151 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004152 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004153 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004154 if (interned == NULL) {
4155 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004156 if (interned == NULL) {
4157 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004158 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004159 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004160 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004161 t = PyDict_GetItem(interned, (PyObject *)s);
4162 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004163 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004164 Py_DECREF(*p);
4165 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004166 return;
4167 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004168
Armin Rigo79f7ad22004-08-07 19:27:39 +00004169 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004170 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004171 return;
4172 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004173 /* The two references in interned are not counted by refcnt.
4174 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004175 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004176 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004177}
4178
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004179void
4180PyString_InternImmortal(PyObject **p)
4181{
4182 PyString_InternInPlace(p);
4183 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4184 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4185 Py_INCREF(*p);
4186 }
4187}
4188
Guido van Rossum2a61e741997-01-18 07:55:05 +00004189
4190PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004191PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004192{
4193 PyObject *s = PyString_FromString(cp);
4194 if (s == NULL)
4195 return NULL;
4196 PyString_InternInPlace(&s);
4197 return s;
4198}
4199
Guido van Rossum8cf04761997-08-02 02:57:45 +00004200void
Fred Drakeba096332000-07-09 07:04:36 +00004201PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004202{
4203 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004204 for (i = 0; i < UCHAR_MAX + 1; i++) {
4205 Py_XDECREF(characters[i]);
4206 characters[i] = NULL;
4207 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004208 Py_XDECREF(nullstring);
4209 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004210}
Barry Warsawa903ad982001-02-23 16:40:48 +00004211
Barry Warsawa903ad982001-02-23 16:40:48 +00004212void _Py_ReleaseInternedStrings(void)
4213{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004214 PyObject *keys;
4215 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004216 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004217 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004218
4219 if (interned == NULL || !PyDict_Check(interned))
4220 return;
4221 keys = PyDict_Keys(interned);
4222 if (keys == NULL || !PyList_Check(keys)) {
4223 PyErr_Clear();
4224 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004225 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004226
4227 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4228 detector, interned strings are not forcibly deallocated; rather, we
4229 give them their stolen references back, and then clear and DECREF
4230 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004231
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004232 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004233 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4234 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004235 for (i = 0; i < n; i++) {
4236 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4237 switch (s->ob_sstate) {
4238 case SSTATE_NOT_INTERNED:
4239 /* XXX Shouldn't happen */
4240 break;
4241 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004242 Py_Refcnt(s) += 1;
4243 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004244 break;
4245 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004246 Py_Refcnt(s) += 2;
4247 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004248 break;
4249 default:
4250 Py_FatalError("Inconsistent interned string state.");
4251 }
4252 s->ob_sstate = SSTATE_NOT_INTERNED;
4253 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004254 fprintf(stderr, "total size of all interned strings: "
4255 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4256 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004257 Py_DECREF(keys);
4258 PyDict_Clear(interned);
4259 Py_DECREF(interned);
4260 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004261}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004262
4263
4264/*********************** Str Iterator ****************************/
4265
4266typedef struct {
4267 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004268 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004269 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4270} striterobject;
4271
4272static void
4273striter_dealloc(striterobject *it)
4274{
4275 _PyObject_GC_UNTRACK(it);
4276 Py_XDECREF(it->it_seq);
4277 PyObject_GC_Del(it);
4278}
4279
4280static int
4281striter_traverse(striterobject *it, visitproc visit, void *arg)
4282{
4283 Py_VISIT(it->it_seq);
4284 return 0;
4285}
4286
4287static PyObject *
4288striter_next(striterobject *it)
4289{
4290 PyStringObject *seq;
4291 PyObject *item;
4292
4293 assert(it != NULL);
4294 seq = it->it_seq;
4295 if (seq == NULL)
4296 return NULL;
4297 assert(PyString_Check(seq));
4298
4299 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00004300 item = PyInt_FromLong(
4301 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004302 if (item != NULL)
4303 ++it->it_index;
4304 return item;
4305 }
4306
4307 Py_DECREF(seq);
4308 it->it_seq = NULL;
4309 return NULL;
4310}
4311
4312static PyObject *
4313striter_len(striterobject *it)
4314{
4315 Py_ssize_t len = 0;
4316 if (it->it_seq)
4317 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
4318 return PyInt_FromSsize_t(len);
4319}
4320
Guido van Rossum49d6b072006-08-17 21:11:47 +00004321PyDoc_STRVAR(length_hint_doc,
4322 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004323
4324static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00004325 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4326 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004327 {NULL, NULL} /* sentinel */
4328};
4329
4330PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004331 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00004332 "striterator", /* tp_name */
4333 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004334 0, /* tp_itemsize */
4335 /* methods */
4336 (destructor)striter_dealloc, /* tp_dealloc */
4337 0, /* tp_print */
4338 0, /* tp_getattr */
4339 0, /* tp_setattr */
4340 0, /* tp_compare */
4341 0, /* tp_repr */
4342 0, /* tp_as_number */
4343 0, /* tp_as_sequence */
4344 0, /* tp_as_mapping */
4345 0, /* tp_hash */
4346 0, /* tp_call */
4347 0, /* tp_str */
4348 PyObject_GenericGetAttr, /* tp_getattro */
4349 0, /* tp_setattro */
4350 0, /* tp_as_buffer */
4351 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4352 0, /* tp_doc */
4353 (traverseproc)striter_traverse, /* tp_traverse */
4354 0, /* tp_clear */
4355 0, /* tp_richcompare */
4356 0, /* tp_weaklistoffset */
4357 PyObject_SelfIter, /* tp_iter */
4358 (iternextfunc)striter_next, /* tp_iternext */
4359 striter_methods, /* tp_methods */
4360 0,
4361};
4362
4363static PyObject *
4364str_iter(PyObject *seq)
4365{
4366 striterobject *it;
4367
4368 if (!PyString_Check(seq)) {
4369 PyErr_BadInternalCall();
4370 return NULL;
4371 }
4372 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
4373 if (it == NULL)
4374 return NULL;
4375 it->it_index = 0;
4376 Py_INCREF(seq);
4377 it->it_seq = (PyStringObject *)seq;
4378 _PyObject_GC_TRACK(it);
4379 return (PyObject *)it;
4380}