blob: 85883431eefa56958ad79c3c19fbfc93d783abbb [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Gregory P. Smith14acde32008-04-09 23:41:13 +000056 if (size < 0) {
57 PyErr_SetString(PyExc_SystemError,
58 "Negative size passed to PyString_FromStringAndSize");
59 return NULL;
60 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 if (size == 0 && (op = nullstring) != NULL) {
62#ifdef COUNT_ALLOCS
63 null_strings++;
64#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 Py_INCREF(op);
66 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 if (size == 1 && str != NULL &&
69 (op = characters[*str & UCHAR_MAX]) != NULL)
70 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071#ifdef COUNT_ALLOCS
72 one_strings++;
73#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 Py_INCREF(op);
75 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000076 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000077
Neal Norwitz4f3be8a2008-07-31 17:08:14 +000078 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
79 PyErr_SetString(PyExc_OverflowError, "string is too large");
80 return NULL;
81 }
82
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000083 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000084 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000087 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000089 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000090 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000091 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000092 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000093 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000101 PyObject *t = (PyObject *)op;
102 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000103 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000104 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000108}
109
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000111PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112{
Tim Peters62de65b2001-12-06 20:29:32 +0000113 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000114 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000115
116 assert(str != NULL);
117 size = strlen(str);
Neal Norwitz4f3be8a2008-07-31 17:08:14 +0000118 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000119 PyErr_SetString(PyExc_OverflowError,
120 "string is too long for a Python string");
121 return NULL;
122 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 if (size == 0 && (op = nullstring) != NULL) {
124#ifdef COUNT_ALLOCS
125 null_strings++;
126#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000127 Py_INCREF(op);
128 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000129 }
130 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
131#ifdef COUNT_ALLOCS
132 one_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000138 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000139 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000140 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000142 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000144 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000145 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000146 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000148 PyObject *t = (PyObject *)op;
149 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000150 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000154 PyObject *t = (PyObject *)op;
155 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000156 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000157 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000158 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000159 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000160 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161}
162
Barry Warsawdadace02001-08-24 18:32:06 +0000163PyObject *
164PyString_FromFormatV(const char *format, va_list vargs)
165{
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000167 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000168 const char* f;
169 char *s;
170 PyObject* string;
171
Tim Petersc15c4f12001-10-02 21:32:07 +0000172#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000173 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000174#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#ifdef __va_copy
176 __va_copy(count, vargs);
177#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000178 count = vargs;
179#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000180#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000181 /* step 1: figure out how large a buffer we need */
182 for (f = format; *f; f++) {
183 if (*f == '%') {
184 const char* p = f;
185 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
186 ;
187
Tim Peters8931ff12006-05-13 23:28:20 +0000188 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
189 * they don't affect the amount of space we reserve.
190 */
191 if ((*f == 'l' || *f == 'z') &&
192 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000193 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000194
Barry Warsawdadace02001-08-24 18:32:06 +0000195 switch (*f) {
196 case 'c':
197 (void)va_arg(count, int);
198 /* fall through... */
199 case '%':
200 n++;
201 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000202 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000203 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000204 /* 20 bytes is enough to hold a 64-bit
205 integer. Decimal takes the most space.
206 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000207 n += 20;
208 break;
209 case 's':
210 s = va_arg(count, char*);
211 n += strlen(s);
212 break;
213 case 'p':
214 (void) va_arg(count, int);
215 /* maximum 64-bit pointer representation:
216 * 0xffffffffffffffff
217 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000218 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000219 */
220 n += 19;
221 break;
222 default:
223 /* if we stumble upon an unknown
224 formatting code, copy the rest of
225 the format string to the output
226 string. (we cannot just skip the
227 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000228 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 n += strlen(p);
230 goto expand;
231 }
232 } else
233 n++;
234 }
235 expand:
236 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000237 /* Since we've analyzed how much space we need for the worst case,
238 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000239 string = PyString_FromStringAndSize(NULL, n);
240 if (!string)
241 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000242
Barry Warsawdadace02001-08-24 18:32:06 +0000243 s = PyString_AsString(string);
244
245 for (f = format; *f; f++) {
246 if (*f == '%') {
247 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000248 Py_ssize_t i;
249 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000250 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000251 /* parse the width.precision part (we're only
252 interested in the precision value, if any) */
253 n = 0;
254 while (isdigit(Py_CHARMASK(*f)))
255 n = (n*10) + *f++ - '0';
256 if (*f == '.') {
257 f++;
258 n = 0;
259 while (isdigit(Py_CHARMASK(*f)))
260 n = (n*10) + *f++ - '0';
261 }
262 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
263 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000264 /* handle the long flag, but only for %ld and %lu.
265 others can be added when necessary. */
266 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000267 longflag = 1;
268 ++f;
269 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000270 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000271 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000272 size_tflag = 1;
273 ++f;
274 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000275
Barry Warsawdadace02001-08-24 18:32:06 +0000276 switch (*f) {
277 case 'c':
278 *s++ = va_arg(vargs, int);
279 break;
280 case 'd':
281 if (longflag)
282 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000283 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000284 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
285 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000286 else
287 sprintf(s, "%d", va_arg(vargs, int));
288 s += strlen(s);
289 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000290 case 'u':
291 if (longflag)
292 sprintf(s, "%lu",
293 va_arg(vargs, unsigned long));
294 else if (size_tflag)
295 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
296 va_arg(vargs, size_t));
297 else
298 sprintf(s, "%u",
299 va_arg(vargs, unsigned int));
300 s += strlen(s);
301 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000302 case 'i':
303 sprintf(s, "%i", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'x':
307 sprintf(s, "%x", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 's':
311 p = va_arg(vargs, char*);
312 i = strlen(p);
313 if (n > 0 && i > n)
314 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000315 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += i;
317 break;
318 case 'p':
319 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000320 /* %p is ill-defined: ensure leading 0x. */
321 if (s[1] == 'X')
322 s[1] = 'x';
323 else if (s[1] != 'x') {
324 memmove(s+2, s, strlen(s)+1);
325 s[0] = '0';
326 s[1] = 'x';
327 }
Barry Warsawdadace02001-08-24 18:32:06 +0000328 s += strlen(s);
329 break;
330 case '%':
331 *s++ = '%';
332 break;
333 default:
334 strcpy(s, p);
335 s += strlen(s);
336 goto end;
337 }
338 } else
339 *s++ = *f;
340 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000341
Barry Warsawdadace02001-08-24 18:32:06 +0000342 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000343 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000344 return string;
345}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000346
Barry Warsawdadace02001-08-24 18:32:06 +0000347PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000348PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000349{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000351 va_list vargs;
352
353#ifdef HAVE_STDARG_PROTOTYPES
354 va_start(vargs, format);
355#else
356 va_start(vargs);
357#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000358 ret = PyString_FromFormatV(format, vargs);
359 va_end(vargs);
360 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000361}
362
363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366 const char *encoding,
367 const char *errors)
368{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 PyObject *v, *str;
370
371 str = PyString_FromStringAndSize(s, size);
372 if (str == NULL)
373 return NULL;
374 v = PyString_AsDecodedString(str, encoding, errors);
375 Py_DECREF(str);
376 return v;
377}
378
379PyObject *PyString_AsDecodedObject(PyObject *str,
380 const char *encoding,
381 const char *errors)
382{
383 PyObject *v;
384
385 if (!PyString_Check(str)) {
386 PyErr_BadArgument();
387 goto onError;
388 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000389
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000390 if (encoding == NULL) {
391#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#else
394 PyErr_SetString(PyExc_ValueError, "no encoding specified");
395 goto onError;
396#endif
397 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398
399 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400 v = PyCodec_Decode(str, encoding, errors);
401 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000402 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403
404 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000405
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000406 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000407 return NULL;
408}
409
410PyObject *PyString_AsDecodedString(PyObject *str,
411 const char *encoding,
412 const char *errors)
413{
414 PyObject *v;
415
416 v = PyString_AsDecodedObject(str, encoding, errors);
417 if (v == NULL)
418 goto onError;
419
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 /* Convert Unicode to a string using the default encoding */
422 if (PyUnicode_Check(v)) {
423 PyObject *temp = v;
424 v = PyUnicode_AsEncodedString(v, NULL, NULL);
425 Py_DECREF(temp);
426 if (v == NULL)
427 goto onError;
428 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000429#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000430 if (!PyString_Check(v)) {
431 PyErr_Format(PyExc_TypeError,
432 "decoder did not return a string object (type=%.400s)",
433 v->ob_type->tp_name);
434 Py_DECREF(v);
435 goto onError;
436 }
437
438 return v;
439
440 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 return NULL;
442}
443
444PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000445 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 const char *encoding,
447 const char *errors)
448{
449 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000450
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 str = PyString_FromStringAndSize(s, size);
452 if (str == NULL)
453 return NULL;
454 v = PyString_AsEncodedString(str, encoding, errors);
455 Py_DECREF(str);
456 return v;
457}
458
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000459PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000460 const char *encoding,
461 const char *errors)
462{
463 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000464
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000465 if (!PyString_Check(str)) {
466 PyErr_BadArgument();
467 goto onError;
468 }
469
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000470 if (encoding == NULL) {
471#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#else
474 PyErr_SetString(PyExc_ValueError, "no encoding specified");
475 goto onError;
476#endif
477 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478
479 /* Encode via the codec registry */
480 v = PyCodec_Encode(str, encoding, errors);
481 if (v == NULL)
482 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000483
484 return v;
485
486 onError:
487 return NULL;
488}
489
490PyObject *PyString_AsEncodedString(PyObject *str,
491 const char *encoding,
492 const char *errors)
493{
494 PyObject *v;
495
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000496 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000497 if (v == NULL)
498 goto onError;
499
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 /* Convert Unicode to a string using the default encoding */
502 if (PyUnicode_Check(v)) {
503 PyObject *temp = v;
504 v = PyUnicode_AsEncodedString(v, NULL, NULL);
505 Py_DECREF(temp);
506 if (v == NULL)
507 goto onError;
508 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000509#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 if (!PyString_Check(v)) {
511 PyErr_Format(PyExc_TypeError,
512 "encoder did not return a string object (type=%.400s)",
513 v->ob_type->tp_name);
514 Py_DECREF(v);
515 goto onError;
516 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000517
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000518 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000519
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000520 onError:
521 return NULL;
522}
523
Guido van Rossum234f9421993-06-17 12:35:49 +0000524static void
Fred Drakeba096332000-07-09 07:04:36 +0000525string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000526{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000527 switch (PyString_CHECK_INTERNED(op)) {
528 case SSTATE_NOT_INTERNED:
529 break;
530
531 case SSTATE_INTERNED_MORTAL:
532 /* revive dead object temporarily for DelItem */
533 op->ob_refcnt = 3;
534 if (PyDict_DelItem(interned, op) != 0)
535 Py_FatalError(
536 "deletion of interned string failed");
537 break;
538
539 case SSTATE_INTERNED_IMMORTAL:
540 Py_FatalError("Immortal interned string died.");
541
542 default:
543 Py_FatalError("Inconsistent interned string state.");
544 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000545 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000546}
547
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548/* Unescape a backslash-escaped string. If unicode is non-zero,
549 the string is a u-literal. If recode_encoding is non-zero,
550 the string is UTF-8 encoded and should be re-encoded in the
551 specified encoding. */
552
553PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000555 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000556 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 const char *recode_encoding)
558{
559 int c;
560 char *p, *buf;
561 const char *end;
562 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000563 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000564 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000565 if (v == NULL)
566 return NULL;
567 p = buf = PyString_AsString(v);
568 end = s + len;
569 while (s < end) {
570 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000571 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000572#ifdef Py_USING_UNICODE
573 if (recode_encoding && (*s & 0x80)) {
574 PyObject *u, *w;
575 char *r;
576 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000577 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 t = s;
579 /* Decode non-ASCII bytes as UTF-8. */
580 while (t < end && (*t & 0x80)) t++;
581 u = PyUnicode_DecodeUTF8(s, t - s, errors);
582 if(!u) goto failed;
583
584 /* Recode them in target encoding. */
585 w = PyUnicode_AsEncodedString(
586 u, recode_encoding, errors);
587 Py_DECREF(u);
588 if (!w) goto failed;
589
590 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000591 assert(PyString_Check(w));
592 r = PyString_AS_STRING(w);
593 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000594 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000595 p += rn;
596 Py_DECREF(w);
597 s = t;
598 } else {
599 *p++ = *s++;
600 }
601#else
602 *p++ = *s++;
603#endif
604 continue;
605 }
606 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000607 if (s==end) {
608 PyErr_SetString(PyExc_ValueError,
609 "Trailing \\ in string");
610 goto failed;
611 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000612 switch (*s++) {
613 /* XXX This assumes ASCII! */
614 case '\n': break;
615 case '\\': *p++ = '\\'; break;
616 case '\'': *p++ = '\''; break;
617 case '\"': *p++ = '\"'; break;
618 case 'b': *p++ = '\b'; break;
619 case 'f': *p++ = '\014'; break; /* FF */
620 case 't': *p++ = '\t'; break;
621 case 'n': *p++ = '\n'; break;
622 case 'r': *p++ = '\r'; break;
623 case 'v': *p++ = '\013'; break; /* VT */
624 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
625 case '0': case '1': case '2': case '3':
626 case '4': case '5': case '6': case '7':
627 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000628 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000630 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 c = (c<<3) + *s++ - '0';
632 }
633 *p++ = c;
634 break;
635 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000636 if (s+1 < end &&
637 isxdigit(Py_CHARMASK(s[0])) &&
638 isxdigit(Py_CHARMASK(s[1])))
639 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 unsigned int x = 0;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x = c - '0';
645 else if (islower(c))
646 x = 10 + c - 'a';
647 else
648 x = 10 + c - 'A';
649 x = x << 4;
650 c = Py_CHARMASK(*s);
651 s++;
652 if (isdigit(c))
653 x += c - '0';
654 else if (islower(c))
655 x += 10 + c - 'a';
656 else
657 x += 10 + c - 'A';
658 *p++ = x;
659 break;
660 }
661 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000662 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666 if (strcmp(errors, "replace") == 0) {
667 *p++ = '?';
668 } else if (strcmp(errors, "ignore") == 0)
669 /* do nothing */;
670 else {
671 PyErr_Format(PyExc_ValueError,
672 "decoding error; "
673 "unknown error handling code: %.400s",
674 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#ifndef Py_USING_UNICODE
678 case 'u':
679 case 'U':
680 case 'N':
681 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000682 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 "Unicode escapes not legal "
684 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000685 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 }
687#endif
688 default:
689 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000690 s--;
691 goto non_esc; /* an arbitry number of unescaped
692 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000693 }
694 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000695 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000697 return v;
698 failed:
699 Py_DECREF(v);
700 return NULL;
701}
702
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000703/* -------------------------------------------------------------------- */
704/* object api */
705
Martin v. Löwis18e16552006-02-15 17:27:45 +0000706static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707string_getsize(register PyObject *op)
708{
709 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000710 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000711 if (PyString_AsStringAndSize(op, &s, &len))
712 return -1;
713 return len;
714}
715
716static /*const*/ char *
717string_getbuffer(register PyObject *op)
718{
719 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000720 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyString_AsStringAndSize(op, &s, &len))
722 return NULL;
723 return s;
724}
725
Martin v. Löwis18e16552006-02-15 17:27:45 +0000726Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000727PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 if (!PyString_Check(op))
730 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732}
733
734/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000735PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737 if (!PyString_Check(op))
738 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000739 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000740}
741
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000742int
743PyString_AsStringAndSize(register PyObject *obj,
744 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000745 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000746{
747 if (s == NULL) {
748 PyErr_BadInternalCall();
749 return -1;
750 }
751
752 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000753#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000754 if (PyUnicode_Check(obj)) {
755 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
756 if (obj == NULL)
757 return -1;
758 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000759 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000760#endif
761 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_Format(PyExc_TypeError,
763 "expected string or Unicode object, "
764 "%.200s found", obj->ob_type->tp_name);
765 return -1;
766 }
767 }
768
769 *s = PyString_AS_STRING(obj);
770 if (len != NULL)
771 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000772 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000773 PyErr_SetString(PyExc_TypeError,
774 "expected string without null bytes");
775 return -1;
776 }
777 return 0;
778}
779
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000781/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000784
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000785#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000786#define STRINGLIB_LEN PyString_GET_SIZE
787#define STRINGLIB_NEW PyString_FromStringAndSize
788#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000789
Fredrik Lundhb9479482006-05-26 17:22:38 +0000790#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000791
Fredrik Lundha50d2012006-05-26 17:04:58 +0000792#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000793
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000794#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000795#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000796#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000797
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000801{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000802 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000803 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000804 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000805
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000806 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000807 if (! PyString_CheckExact(op)) {
808 int ret;
809 /* A str subclass may have its own __str__ method. */
810 op = (PyStringObject *) PyObject_Str((PyObject *)op);
811 if (op == NULL)
812 return -1;
813 ret = string_print(op, fp, flags);
814 Py_DECREF(op);
815 return ret;
816 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000817 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000818 char *data = op->ob_sval;
819 Py_ssize_t size = op->ob_size;
820 while (size > INT_MAX) {
821 /* Very long strings cannot be written atomically.
822 * But don't write exactly INT_MAX bytes at a time
823 * to avoid memory aligment issues.
824 */
825 const int chunk_size = INT_MAX & ~0x3FFF;
826 fwrite(data, 1, chunk_size, fp);
827 data += chunk_size;
828 size -= chunk_size;
829 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000830#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000831 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000832#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000833 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000834#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000835 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837
Thomas Wouters7e474022000-07-16 12:04:32 +0000838 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000840 if (memchr(op->ob_sval, '\'', op->ob_size) &&
841 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000842 quote = '"';
843
844 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 for (i = 0; i < op->ob_size; i++) {
846 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000847 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000853 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fprintf(fp, "\\r");
855 else if (c < ' ' || c >= 0x7f)
856 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000857 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000858 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000861 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000862}
863
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864PyObject *
865PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000867 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000868 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000869 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000870 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000871 PyErr_SetString(PyExc_OverflowError,
872 "string is too large to make repr");
Guido van Rossume6a6f392007-11-07 01:19:49 +0000873 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000874 }
875 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000877 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878 }
879 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000880 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881 register char c;
882 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 int quote;
884
Thomas Wouters7e474022000-07-16 12:04:32 +0000885 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000887 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000888 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000889 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000890 quote = '"';
891
Tim Peters9161c8b2001-12-03 01:55:38 +0000892 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000893 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000895 /* There's at least enough room for a hex escape
896 and a closing quote. */
897 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000899 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000901 else if (c == '\t')
902 *p++ = '\\', *p++ = 't';
903 else if (c == '\n')
904 *p++ = '\\', *p++ = 'n';
905 else if (c == '\r')
906 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000907 else if (c < ' ' || c >= 0x7f) {
908 /* For performance, we don't want to call
909 PyOS_snprintf here (extra layers of
910 function call). */
911 sprintf(p, "\\x%02x", c & 0xff);
912 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000913 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000914 else
915 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000917 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000918 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000919 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000920 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000921 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000922 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000924}
925
Guido van Rossum189f1df2001-05-01 16:51:53 +0000926static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000927string_repr(PyObject *op)
928{
929 return PyString_Repr(op, 1);
930}
931
932static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933string_str(PyObject *s)
934{
Tim Petersc9933152001-10-16 20:18:24 +0000935 assert(PyString_Check(s));
936 if (PyString_CheckExact(s)) {
937 Py_INCREF(s);
938 return s;
939 }
940 else {
941 /* Subtype -- return genuine string with the same value. */
942 PyStringObject *t = (PyStringObject *) s;
943 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
944 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000945}
946
Martin v. Löwis18e16552006-02-15 17:27:45 +0000947static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000948string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949{
950 return a->ob_size;
951}
952
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000954string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955{
Andrew Dalke598710c2006-05-25 18:18:39 +0000956 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 register PyStringObject *op;
958 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000959#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000960 if (PyUnicode_Check(bb))
961 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000962#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000963 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000964 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000965 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966 return NULL;
967 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000969 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 if ((a->ob_size == 0 || b->ob_size == 0) &&
971 PyString_CheckExact(a) && PyString_CheckExact(b)) {
972 if (a->ob_size == 0) {
973 Py_INCREF(bb);
974 return bb;
975 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976 Py_INCREF(a);
977 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978 }
Neal Norwitz4f3be8a2008-07-31 17:08:14 +0000979 /* Check that string sizes are not negative, to prevent an
980 overflow in cases where we are passed incorrectly-created
981 strings with negative lengths (due to a bug in other code).
982 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 size = a->ob_size + b->ob_size;
Neal Norwitz4f3be8a2008-07-31 17:08:14 +0000984 if (a->ob_size < 0 || b->ob_size < 0 ||
985 a->ob_size > PY_SSIZE_T_MAX - b->ob_size) {
Andrew Dalke598710c2006-05-25 18:18:39 +0000986 PyErr_SetString(PyExc_OverflowError,
987 "strings are too large to concat");
988 return NULL;
989 }
990
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000991 /* Inline PyObject_NewVar */
Neal Norwitz4f3be8a2008-07-31 17:08:14 +0000992 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
993 PyErr_SetString(PyExc_OverflowError,
994 "strings are too large to concat");
995 return NULL;
996 }
Tim Peterse7c05322004-06-27 17:24:49 +0000997 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000998 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001000 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001001 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001002 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001003 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1004 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001005 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007#undef b
1008}
1009
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001011string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001013 register Py_ssize_t i;
1014 register Py_ssize_t j;
1015 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001016 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001017 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001018 if (n < 0)
1019 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001020 /* watch out for overflows: the size can overflow int,
1021 * and the # of bytes needed can overflow size_t
1022 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001023 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001024 if (n && size / n != a->ob_size) {
1025 PyErr_SetString(PyExc_OverflowError,
1026 "repeated string is too long");
1027 return NULL;
1028 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001029 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030 Py_INCREF(a);
1031 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032 }
Tim Peterse7c05322004-06-27 17:24:49 +00001033 nbytes = (size_t)size;
1034 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001035 PyErr_SetString(PyExc_OverflowError,
1036 "repeated string is too long");
1037 return NULL;
1038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001040 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001041 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001043 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001044 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001045 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001046 op->ob_sval[size] = '\0';
1047 if (a->ob_size == 1 && n > 0) {
1048 memset(op->ob_sval, a->ob_sval[0] , n);
1049 return (PyObject *) op;
1050 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001051 i = 0;
1052 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001053 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001054 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001055 }
1056 while (i < size) {
1057 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001058 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001059 i += j;
1060 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001061 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001062}
1063
1064/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1065
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001067string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001068 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001069 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070{
1071 if (i < 0)
1072 i = 0;
1073 if (j < 0)
1074 j = 0; /* Avoid signed/unsigned bug in next line */
1075 if (j > a->ob_size)
1076 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001077 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1078 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079 Py_INCREF(a);
1080 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001081 }
1082 if (j < i)
1083 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001084 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085}
1086
Guido van Rossum9284a572000-03-07 15:53:43 +00001087static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001088string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001089{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001090 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001091#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 if (PyUnicode_Check(sub_obj))
1093 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001094#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001095 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001096 PyErr_SetString(PyExc_TypeError,
1097 "'in <string>' requires string as left operand");
1098 return -1;
1099 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001100 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001101
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001102 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001103}
1104
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001105static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001106string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001107{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001108 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001109 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001112 return NULL;
1113 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001114 pchar = a->ob_sval[i];
1115 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001116 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001117 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001118 else {
1119#ifdef COUNT_ALLOCS
1120 one_strings++;
1121#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001122 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001123 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001124 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001125}
1126
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127static PyObject*
1128string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001129{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001131 Py_ssize_t len_a, len_b;
1132 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133 PyObject *result;
1134
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001135 /* Make sure both arguments are strings. */
1136 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 result = Py_NotImplemented;
1138 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001139 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001140 if (a == b) {
1141 switch (op) {
1142 case Py_EQ:case Py_LE:case Py_GE:
1143 result = Py_True;
1144 goto out;
1145 case Py_NE:case Py_LT:case Py_GT:
1146 result = Py_False;
1147 goto out;
1148 }
1149 }
1150 if (op == Py_EQ) {
1151 /* Supporting Py_NE here as well does not save
1152 much time, since Py_NE is rarely used. */
1153 if (a->ob_size == b->ob_size
1154 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001155 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001156 a->ob_size) == 0)) {
1157 result = Py_True;
1158 } else {
1159 result = Py_False;
1160 }
1161 goto out;
1162 }
1163 len_a = a->ob_size; len_b = b->ob_size;
1164 min_len = (len_a < len_b) ? len_a : len_b;
1165 if (min_len > 0) {
1166 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1167 if (c==0)
1168 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1169 }else
1170 c = 0;
1171 if (c == 0)
1172 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1173 switch (op) {
1174 case Py_LT: c = c < 0; break;
1175 case Py_LE: c = c <= 0; break;
1176 case Py_EQ: assert(0); break; /* unreachable */
1177 case Py_NE: c = c != 0; break;
1178 case Py_GT: c = c > 0; break;
1179 case Py_GE: c = c >= 0; break;
1180 default:
1181 result = Py_NotImplemented;
1182 goto out;
1183 }
1184 result = c ? Py_True : Py_False;
1185 out:
1186 Py_INCREF(result);
1187 return result;
1188}
1189
1190int
1191_PyString_Eq(PyObject *o1, PyObject *o2)
1192{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001193 PyStringObject *a = (PyStringObject*) o1;
1194 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001195 return a->ob_size == b->ob_size
1196 && *a->ob_sval == *b->ob_sval
1197 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001198}
1199
Guido van Rossum9bfef441993-03-29 10:43:31 +00001200static long
Fred Drakeba096332000-07-09 07:04:36 +00001201string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001202{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001203 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001204 register unsigned char *p;
1205 register long x;
1206
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001207 if (a->ob_shash != -1)
1208 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001209 len = a->ob_size;
1210 p = (unsigned char *) a->ob_sval;
1211 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001212 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001213 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001214 x ^= a->ob_size;
1215 if (x == -1)
1216 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001217 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001218 return x;
1219}
1220
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221static PyObject*
1222string_subscript(PyStringObject* self, PyObject* item)
1223{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001224 if (PyIndex_Check(item)) {
1225 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001226 if (i == -1 && PyErr_Occurred())
1227 return NULL;
1228 if (i < 0)
1229 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001230 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001231 }
1232 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001233 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 char* source_buf;
1235 char* result_buf;
1236 PyObject* result;
1237
Tim Petersae1d0c92006-03-17 03:29:34 +00001238 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 PyString_GET_SIZE(self),
1240 &start, &stop, &step, &slicelength) < 0) {
1241 return NULL;
1242 }
1243
1244 if (slicelength <= 0) {
1245 return PyString_FromStringAndSize("", 0);
1246 }
1247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001265 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001266 "string indices must be integers");
1267 return NULL;
1268 }
1269}
1270
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271static Py_ssize_t
1272string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273{
1274 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001275 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001276 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001277 return -1;
1278 }
1279 *ptr = (void *)self->ob_sval;
1280 return self->ob_size;
1281}
1282
Martin v. Löwis18e16552006-02-15 17:27:45 +00001283static Py_ssize_t
1284string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001285{
Guido van Rossum045e6881997-09-08 18:30:11 +00001286 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001287 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001288 return -1;
1289}
1290
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291static Py_ssize_t
1292string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001293{
1294 if ( lenp )
1295 *lenp = self->ob_size;
1296 return 1;
1297}
1298
Martin v. Löwis18e16552006-02-15 17:27:45 +00001299static Py_ssize_t
1300string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001301{
1302 if ( index != 0 ) {
1303 PyErr_SetString(PyExc_SystemError,
1304 "accessing non-existent string segment");
1305 return -1;
1306 }
1307 *ptr = self->ob_sval;
1308 return self->ob_size;
1309}
1310
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001311static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001312 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001313 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001314 (ssizeargfunc)string_repeat, /*sq_repeat*/
1315 (ssizeargfunc)string_item, /*sq_item*/
1316 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001317 0, /*sq_ass_item*/
1318 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001319 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001320};
1321
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001323 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001324 (binaryfunc)string_subscript,
1325 0,
1326};
1327
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001328static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001329 (readbufferproc)string_buffer_getreadbuf,
1330 (writebufferproc)string_buffer_getwritebuf,
1331 (segcountproc)string_buffer_getsegcount,
1332 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001333};
1334
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335
1336
1337#define LEFTSTRIP 0
1338#define RIGHTSTRIP 1
1339#define BOTHSTRIP 2
1340
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001341/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001342static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1343
1344#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001345
Andrew Dalke525eab32006-05-26 14:00:45 +00001346
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001347/* Don't call if length < 2 */
1348#define Py_STRING_MATCH(target, offset, pattern, length) \
1349 (target[offset] == pattern[0] && \
1350 target[offset+length-1] == pattern[length-1] && \
1351 !memcmp(target+offset+1, pattern+1, length-2) )
1352
1353
Andrew Dalke525eab32006-05-26 14:00:45 +00001354/* Overallocate the initial list to reduce the number of reallocs for small
1355 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1356 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1357 text (roughly 11 words per line) and field delimited data (usually 1-10
1358 fields). For large strings the split algorithms are bandwidth limited
1359 so increasing the preallocation likely will not improve things.*/
1360
1361#define MAX_PREALLOC 12
1362
1363/* 5 splits gives 6 elements */
1364#define PREALLOC_SIZE(maxsplit) \
1365 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1366
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001367#define SPLIT_APPEND(data, left, right) \
1368 str = PyString_FromStringAndSize((data) + (left), \
1369 (right) - (left)); \
1370 if (str == NULL) \
1371 goto onError; \
1372 if (PyList_Append(list, str)) { \
1373 Py_DECREF(str); \
1374 goto onError; \
1375 } \
1376 else \
1377 Py_DECREF(str);
1378
Andrew Dalke02758d62006-05-26 15:21:01 +00001379#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001380 str = PyString_FromStringAndSize((data) + (left), \
1381 (right) - (left)); \
1382 if (str == NULL) \
1383 goto onError; \
1384 if (count < MAX_PREALLOC) { \
1385 PyList_SET_ITEM(list, count, str); \
1386 } else { \
1387 if (PyList_Append(list, str)) { \
1388 Py_DECREF(str); \
1389 goto onError; \
1390 } \
1391 else \
1392 Py_DECREF(str); \
1393 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001394 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001395
1396/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001397#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001398
Andrew Dalke02758d62006-05-26 15:21:01 +00001399#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1400#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1401#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1402#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1403
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001404Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001405split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
Andrew Dalke525eab32006-05-26 14:00:45 +00001407 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410
1411 if (list == NULL)
1412 return NULL;
1413
Andrew Dalke02758d62006-05-26 15:21:01 +00001414 i = j = 0;
1415
1416 while (maxsplit-- > 0) {
1417 SKIP_SPACE(s, i, len);
1418 if (i==len) break;
1419 j = i; i++;
1420 SKIP_NONSPACE(s, i, len);
1421 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001423
1424 if (i < len) {
1425 /* Only occurs when maxsplit was reached */
1426 /* Skip any remaining whitespace and copy to end of string */
1427 SKIP_SPACE(s, i, len);
1428 if (i != len)
1429 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434 Py_DECREF(list);
1435 return NULL;
1436}
1437
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001438Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001439split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440{
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001443 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444
1445 if (list == NULL)
1446 return NULL;
1447
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001448 i = j = 0;
1449 while ((j < len) && (maxcount-- > 0)) {
1450 for(; j<len; j++) {
1451 /* I found that using memchr makes no difference */
1452 if (s[j] == ch) {
1453 SPLIT_ADD(s, i, j);
1454 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001455 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001456 }
1457 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001458 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001459 if (i <= len) {
1460 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 return list;
1464
1465 onError:
1466 Py_DECREF(list);
1467 return NULL;
1468}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001470PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471"S.split([sep [,maxsplit]]) -> list of strings\n\
1472\n\
1473Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001475splits are done. If sep is not specified or is None, any\n\
1476whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477
1478static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001479string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001481 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001482 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001484 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001485#ifdef USE_FAST
1486 Py_ssize_t pos;
1487#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488
Martin v. Löwis9c830762006-04-13 08:37:17 +00001489 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001492 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001495 if (PyString_Check(subobj)) {
1496 sub = PyString_AS_STRING(subobj);
1497 n = PyString_GET_SIZE(subobj);
1498 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001499#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 else if (PyUnicode_Check(subobj))
1501 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001502#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1504 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001505
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 if (n == 0) {
1507 PyErr_SetString(PyExc_ValueError, "empty separator");
1508 return NULL;
1509 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 else if (n == 1)
1511 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512
Andrew Dalke525eab32006-05-26 14:00:45 +00001513 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 if (list == NULL)
1515 return NULL;
1516
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001517#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001519 while (maxsplit-- > 0) {
1520 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1521 if (pos < 0)
1522 break;
1523 j = i+pos;
1524 SPLIT_ADD(s, i, j);
1525 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001527#else
1528 i = j = 0;
1529 while ((j+n <= len) && (maxsplit-- > 0)) {
1530 for (; j+n <= len; j++) {
1531 if (Py_STRING_MATCH(s, j, sub, n)) {
1532 SPLIT_ADD(s, i, j);
1533 i = j = j + n;
1534 break;
1535 }
1536 }
1537 }
1538#endif
1539 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001540 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541 return list;
1542
Andrew Dalke525eab32006-05-26 14:00:45 +00001543 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 Py_DECREF(list);
1545 return NULL;
1546}
1547
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548PyDoc_STRVAR(partition__doc__,
1549"S.partition(sep) -> (head, sep, tail)\n\
1550\n\
1551Searches for the separator sep in S, and returns the part before it,\n\
1552the separator itself, and the part after it. If the separator is not\n\
1553found, returns S and two empty strings.");
1554
1555static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001556string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001558 const char *sep;
1559 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001560
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001561 if (PyString_Check(sep_obj)) {
1562 sep = PyString_AS_STRING(sep_obj);
1563 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001564 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001565#ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001567 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001568#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001569 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001570 return NULL;
1571
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001572 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001573 (PyObject*) self,
1574 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1575 sep_obj, sep, sep_len
1576 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001577}
1578
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001579PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001580"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001581\n\
1582Searches for the separator sep in S, starting at the end of S, and returns\n\
1583the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001584separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001585
1586static PyObject *
1587string_rpartition(PyStringObject *self, PyObject *sep_obj)
1588{
1589 const char *sep;
1590 Py_ssize_t sep_len;
1591
1592 if (PyString_Check(sep_obj)) {
1593 sep = PyString_AS_STRING(sep_obj);
1594 sep_len = PyString_GET_SIZE(sep_obj);
1595 }
1596#ifdef Py_USING_UNICODE
1597 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arcafa0d582008-09-01 20:05:08 +00001598 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001599#endif
1600 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1601 return NULL;
1602
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001603 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001604 (PyObject*) self,
1605 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1606 sep_obj, sep, sep_len
1607 );
1608}
1609
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001610Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001611rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001612{
Andrew Dalke525eab32006-05-26 14:00:45 +00001613 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001614 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001615 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001616
1617 if (list == NULL)
1618 return NULL;
1619
Andrew Dalke02758d62006-05-26 15:21:01 +00001620 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001621
Andrew Dalke02758d62006-05-26 15:21:01 +00001622 while (maxsplit-- > 0) {
1623 RSKIP_SPACE(s, i);
1624 if (i<0) break;
1625 j = i; i--;
1626 RSKIP_NONSPACE(s, i);
1627 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001628 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001629 if (i >= 0) {
1630 /* Only occurs when maxsplit was reached */
1631 /* Skip any remaining whitespace and copy to beginning of string */
1632 RSKIP_SPACE(s, i);
1633 if (i >= 0)
1634 SPLIT_ADD(s, 0, i + 1);
1635
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001636 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001637 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001638 if (PyList_Reverse(list) < 0)
1639 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001640 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001642 Py_DECREF(list);
1643 return NULL;
1644}
1645
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001646Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001647rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001648{
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001651 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001652
1653 if (list == NULL)
1654 return NULL;
1655
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001656 i = j = len - 1;
1657 while ((i >= 0) && (maxcount-- > 0)) {
1658 for (; i >= 0; i--) {
1659 if (s[i] == ch) {
1660 SPLIT_ADD(s, i + 1, j + 1);
1661 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001662 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001663 }
1664 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001665 }
1666 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001667 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001668 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001669 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001670 if (PyList_Reverse(list) < 0)
1671 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001672 return list;
1673
1674 onError:
1675 Py_DECREF(list);
1676 return NULL;
1677}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678
1679PyDoc_STRVAR(rsplit__doc__,
1680"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1681\n\
1682Return a list of the words in the string S, using sep as the\n\
1683delimiter string, starting at the end of the string and working\n\
1684to the front. If maxsplit is given, at most maxsplit splits are\n\
1685done. If sep is not specified or is None, any whitespace string\n\
1686is a separator.");
1687
1688static PyObject *
1689string_rsplit(PyStringObject *self, PyObject *args)
1690{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001691 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001692 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001694 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695
Martin v. Löwis9c830762006-04-13 08:37:17 +00001696 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001697 return NULL;
1698 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001699 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 if (subobj == Py_None)
1701 return rsplit_whitespace(s, len, maxsplit);
1702 if (PyString_Check(subobj)) {
1703 sub = PyString_AS_STRING(subobj);
1704 n = PyString_GET_SIZE(subobj);
1705 }
1706#ifdef Py_USING_UNICODE
1707 else if (PyUnicode_Check(subobj))
1708 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1709#endif
1710 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1711 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001712
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 if (n == 0) {
1714 PyErr_SetString(PyExc_ValueError, "empty separator");
1715 return NULL;
1716 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001717 else if (n == 1)
1718 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719
Andrew Dalke525eab32006-05-26 14:00:45 +00001720 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001721 if (list == NULL)
1722 return NULL;
1723
1724 j = len;
1725 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001726
1727 while ( (i >= 0) && (maxsplit-- > 0) ) {
1728 for (; i>=0; i--) {
1729 if (Py_STRING_MATCH(s, i, sub, n)) {
1730 SPLIT_ADD(s, i + n, j);
1731 j = i;
1732 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001733 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001734 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001735 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001737 SPLIT_ADD(s, 0, j);
1738 FIX_PREALLOC_SIZE(list);
1739 if (PyList_Reverse(list) < 0)
1740 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001741 return list;
1742
Andrew Dalke525eab32006-05-26 14:00:45 +00001743onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001744 Py_DECREF(list);
1745 return NULL;
1746}
1747
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001749PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750"S.join(sequence) -> string\n\
1751\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754
1755static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001756string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757{
1758 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001759 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001764 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001765 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766
Tim Peters19fe14e2001-01-19 03:03:47 +00001767 seq = PySequence_Fast(orig, "");
1768 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001769 return NULL;
1770 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001771
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001772 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 if (seqlen == 0) {
1774 Py_DECREF(seq);
1775 return PyString_FromString("");
1776 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001779 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1780 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001781 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001782 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001785
Raymond Hettinger674f2412004-08-23 23:23:54 +00001786 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001787 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001788 * Do a pre-pass to figure out the total amount of space we'll
1789 * need (sz), see whether any argument is absurd, and defer to
1790 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001793 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 item = PySequence_Fast_GET_ITEM(seq, i);
1795 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001796#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001797 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001798 /* Defer to Unicode join.
1799 * CAUTION: There's no gurantee that the
1800 * original sequence can be iterated over
1801 * again, so we must pass seq here.
1802 */
1803 PyObject *result;
1804 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001805 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001806 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001807 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001808#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001809 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001810 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001811 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001812 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001813 Py_DECREF(seq);
1814 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001815 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001816 sz += PyString_GET_SIZE(item);
1817 if (i != 0)
1818 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001819 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001821 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 Py_DECREF(seq);
1823 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001825 }
1826
1827 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001829 if (res == NULL) {
1830 Py_DECREF(seq);
1831 return NULL;
1832 }
1833
1834 /* Catenate everything. */
1835 p = PyString_AS_STRING(res);
1836 for (i = 0; i < seqlen; ++i) {
1837 size_t n;
1838 item = PySequence_Fast_GET_ITEM(seq, i);
1839 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001840 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001841 p += n;
1842 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001843 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001844 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001845 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001847
Jeremy Hylton49048292000-07-11 03:28:17 +00001848 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850}
1851
Tim Peters52e155e2001-06-16 05:42:57 +00001852PyObject *
1853_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001854{
Tim Petersa7259592001-06-16 05:11:17 +00001855 assert(sep != NULL && PyString_Check(sep));
1856 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001857 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001858}
1859
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001860Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001861string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001862{
1863 if (*end > len)
1864 *end = len;
1865 else if (*end < 0)
1866 *end += len;
1867 if (*end < 0)
1868 *end = 0;
1869 if (*start < 0)
1870 *start += len;
1871 if (*start < 0)
1872 *start = 0;
1873}
1874
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001875Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001876string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001878 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001879 const char *sub;
1880 Py_ssize_t sub_len;
1881 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001883 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1884 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 return -2;
1886 if (PyString_Check(subobj)) {
1887 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001888 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001889 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001892 return PyUnicode_Find(
1893 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001894#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001895 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001896 /* XXX - the "expected a character buffer object" is pretty
1897 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 return -2;
1899
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001900 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001901 return stringlib_find_slice(
1902 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1903 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001904 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001905 return stringlib_rfind_slice(
1906 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1907 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.find(sub [,start [,end]]) -> int\n\
1913\n\
1914Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001915such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916arguments start and end are interpreted as in slice notation.\n\
1917\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001918Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919
1920static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001921string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001923 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 if (result == -2)
1925 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001926 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927}
1928
1929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001930PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931"S.index(sub [,start [,end]]) -> int\n\
1932\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001933Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934
1935static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001936string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939 if (result == -2)
1940 return NULL;
1941 if (result == -1) {
1942 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001943 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 return NULL;
1945 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rfind(sub [,start [,end]]) -> int\n\
1952\n\
1953Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001954such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955arguments start and end are interpreted as in slice notation.\n\
1956\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 if (result == -2)
1964 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966}
1967
1968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001969PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970"S.rindex(sub [,start [,end]]) -> int\n\
1971\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001972Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973
1974static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001975string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 if (result == -2)
1979 return NULL;
1980 if (result == -1) {
1981 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001982 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 return NULL;
1984 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986}
1987
1988
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001989Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1991{
1992 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001993 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001995 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1996 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001997
1998 i = 0;
1999 if (striptype != RIGHTSTRIP) {
2000 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2001 i++;
2002 }
2003 }
2004
2005 j = len;
2006 if (striptype != LEFTSTRIP) {
2007 do {
2008 j--;
2009 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2010 j++;
2011 }
2012
2013 if (i == 0 && j == len && PyString_CheckExact(self)) {
2014 Py_INCREF(self);
2015 return (PyObject*)self;
2016 }
2017 else
2018 return PyString_FromStringAndSize(s+i, j-i);
2019}
2020
2021
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002022Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002023do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024{
2025 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002026 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028 i = 0;
2029 if (striptype != RIGHTSTRIP) {
2030 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2031 i++;
2032 }
2033 }
2034
2035 j = len;
2036 if (striptype != LEFTSTRIP) {
2037 do {
2038 j--;
2039 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2040 j++;
2041 }
2042
Tim Peters8fa5dd02001-09-12 02:18:30 +00002043 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 Py_INCREF(self);
2045 return (PyObject*)self;
2046 }
2047 else
2048 return PyString_FromStringAndSize(s+i, j-i);
2049}
2050
2051
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002052Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002053do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2054{
2055 PyObject *sep = NULL;
2056
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002057 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002058 return NULL;
2059
2060 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002061 if (PyString_Check(sep))
2062 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002063#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002064 else if (PyUnicode_Check(sep)) {
2065 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2066 PyObject *res;
2067 if (uniself==NULL)
2068 return NULL;
2069 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2070 striptype, sep);
2071 Py_DECREF(uniself);
2072 return res;
2073 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002074#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002075 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002076#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002077 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002078#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002079 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002080#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002081 STRIPNAME(striptype));
2082 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083 }
2084
2085 return do_strip(self, striptype);
2086}
2087
2088
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002089PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002090"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091\n\
2092Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002093whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094If chars is given and not None, remove characters in chars instead.\n\
2095If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100 if (PyTuple_GET_SIZE(args) == 0)
2101 return do_strip(self, BOTHSTRIP); /* Common case */
2102 else
2103 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104}
2105
2106
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002107PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002108"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002111If chars is given and not None, remove characters in chars instead.\n\
2112If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113
2114static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002117 if (PyTuple_GET_SIZE(args) == 0)
2118 return do_strip(self, LEFTSTRIP); /* Common case */
2119 else
2120 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121}
2122
2123
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002125"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002127Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002128If chars is given and not None, remove characters in chars instead.\n\
2129If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130
2131static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002132string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002134 if (PyTuple_GET_SIZE(args) == 0)
2135 return do_strip(self, RIGHTSTRIP); /* Common case */
2136 else
2137 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138}
2139
2140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142"S.lower() -> string\n\
2143\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002144Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002146/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2147#ifndef _tolower
2148#define _tolower tolower
2149#endif
2150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002152string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002154 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002155 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002156 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002158 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002159 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161
2162 s = PyString_AS_STRING(newobj);
2163
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002164 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002165
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002167 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 if (isupper(c))
2169 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171
Anthony Baxtera6286212006-04-11 07:42:36 +00002172 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173}
2174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002175PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176"S.upper() -> string\n\
2177\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002180#ifndef _toupper
2181#define _toupper toupper
2182#endif
2183
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002185string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002187 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002188 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002191 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002192 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002194
2195 s = PyString_AS_STRING(newobj);
2196
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002197 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002200 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002201 if (islower(c))
2202 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002204
Anthony Baxtera6286212006-04-11 07:42:36 +00002205 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206}
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209"S.title() -> string\n\
2210\n\
2211Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213
2214static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002215string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216{
2217 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002218 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002219 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221
Anthony Baxtera6286212006-04-11 07:42:36 +00002222 newobj = PyString_FromStringAndSize(NULL, n);
2223 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 for (i = 0; i < n; i++) {
2227 int c = Py_CHARMASK(*s++);
2228 if (islower(c)) {
2229 if (!previous_is_cased)
2230 c = toupper(c);
2231 previous_is_cased = 1;
2232 } else if (isupper(c)) {
2233 if (previous_is_cased)
2234 c = tolower(c);
2235 previous_is_cased = 1;
2236 } else
2237 previous_is_cased = 0;
2238 *s_new++ = c;
2239 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241}
2242
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002243PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244"S.capitalize() -> string\n\
2245\n\
2246Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248
2249static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002250string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251{
2252 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002253 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002254 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255
Anthony Baxtera6286212006-04-11 07:42:36 +00002256 newobj = PyString_FromStringAndSize(NULL, n);
2257 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002259 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 if (0 < n) {
2261 int c = Py_CHARMASK(*s++);
2262 if (islower(c))
2263 *s_new = toupper(c);
2264 else
2265 *s_new = c;
2266 s_new++;
2267 }
2268 for (i = 1; i < n; i++) {
2269 int c = Py_CHARMASK(*s++);
2270 if (isupper(c))
2271 *s_new = tolower(c);
2272 else
2273 *s_new = c;
2274 s_new++;
2275 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002276 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277}
2278
2279
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002280PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281"S.count(sub[, start[, end]]) -> int\n\
2282\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002283Return the number of non-overlapping occurrences of substring sub in\n\
2284string S[start:end]. Optional arguments start and end are interpreted\n\
2285as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286
2287static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002288string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002290 PyObject *sub_obj;
2291 const char *str = PyString_AS_STRING(self), *sub;
2292 Py_ssize_t sub_len;
2293 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002295 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2296 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002298
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002299 if (PyString_Check(sub_obj)) {
2300 sub = PyString_AS_STRING(sub_obj);
2301 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002303#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002304 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002305 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002306 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002307 if (count == -1)
2308 return NULL;
2309 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002310 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002311 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002312#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002313 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 return NULL;
2315
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002316 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002317
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002318 return PyInt_FromSsize_t(
2319 stringlib_count(str + start, end - start, sub, sub_len)
2320 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321}
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.swapcase() -> string\n\
2325\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002327converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328
2329static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002330string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331{
2332 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002333 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002334 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335
Anthony Baxtera6286212006-04-11 07:42:36 +00002336 newobj = PyString_FromStringAndSize(NULL, n);
2337 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002339 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 for (i = 0; i < n; i++) {
2341 int c = Py_CHARMASK(*s++);
2342 if (islower(c)) {
2343 *s_new = toupper(c);
2344 }
2345 else if (isupper(c)) {
2346 *s_new = tolower(c);
2347 }
2348 else
2349 *s_new = c;
2350 s_new++;
2351 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002352 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353}
2354
2355
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002356PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357"S.translate(table [,deletechars]) -> string\n\
2358\n\
2359Return a copy of the string S, where all characters occurring\n\
2360in the optional argument deletechars are removed, and the\n\
2361remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002362translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
2364static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002365string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 register char *input, *output;
2368 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002369 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002372 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 PyObject *result;
2374 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002377 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380
2381 if (PyString_Check(tableobj)) {
2382 table1 = PyString_AS_STRING(tableobj);
2383 tablen = PyString_GET_SIZE(tableobj);
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002387 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 parameter; instead a mapping to None will cause characters
2389 to be deleted. */
2390 if (delobj != NULL) {
2391 PyErr_SetString(PyExc_TypeError,
2392 "deletions are implemented differently for unicode");
2393 return NULL;
2394 }
2395 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400
Martin v. Löwis00b61272002-12-12 20:03:19 +00002401 if (tablen != 256) {
2402 PyErr_SetString(PyExc_ValueError,
2403 "translation table must be 256 characters long");
2404 return NULL;
2405 }
2406
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 if (delobj != NULL) {
2408 if (PyString_Check(delobj)) {
2409 del_table = PyString_AS_STRING(delobj);
2410 dellen = PyString_GET_SIZE(delobj);
2411 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002412#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413 else if (PyUnicode_Check(delobj)) {
2414 PyErr_SetString(PyExc_TypeError,
2415 "deletions are implemented differently for unicode");
2416 return NULL;
2417 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002418#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2420 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002421 }
2422 else {
2423 del_table = NULL;
2424 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 }
2426
2427 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002428 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 result = PyString_FromStringAndSize((char *)NULL, inlen);
2430 if (result == NULL)
2431 return NULL;
2432 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002433 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
2435 if (dellen == 0) {
2436 /* If no deletions are required, use faster code */
2437 for (i = inlen; --i >= 0; ) {
2438 c = Py_CHARMASK(*input++);
2439 if (Py_CHARMASK((*output++ = table[c])) != c)
2440 changed = 1;
2441 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002442 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 return result;
2444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2447 }
2448
2449 for (i = 0; i < 256; i++)
2450 trans_table[i] = Py_CHARMASK(table[i]);
2451
2452 for (i = 0; i < dellen; i++)
2453 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2454
2455 for (i = inlen; --i >= 0; ) {
2456 c = Py_CHARMASK(*input++);
2457 if (trans_table[c] != -1)
2458 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2459 continue;
2460 changed = 1;
2461 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002462 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 Py_DECREF(result);
2464 Py_INCREF(input_obj);
2465 return input_obj;
2466 }
2467 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002468 if (inlen > 0)
2469 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470 return result;
2471}
2472
2473
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002474#define FORWARD 1
2475#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002477/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002479#define findchar(target, target_len, c) \
2480 ((char *)memchr((const void *)(target), c, target_len))
2481
2482/* String ops must return a string. */
2483/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002484Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002487 if (PyString_CheckExact(self)) {
2488 Py_INCREF(self);
2489 return self;
2490 }
2491 return (PyStringObject *)PyString_FromStringAndSize(
2492 PyString_AS_STRING(self),
2493 PyString_GET_SIZE(self));
2494}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002496Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002497countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002498{
2499 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002500 const char *start=target;
2501 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002503 while ( (start=findchar(start, end-start, c)) != NULL ) {
2504 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002505 if (count >= maxcount)
2506 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002507 start += 1;
2508 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002509 return count;
2510}
2511
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002512Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002513findstring(const char *target, Py_ssize_t target_len,
2514 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002515 Py_ssize_t start,
2516 Py_ssize_t end,
2517 int direction)
2518{
2519 if (start < 0) {
2520 start += target_len;
2521 if (start < 0)
2522 start = 0;
2523 }
2524 if (end > target_len) {
2525 end = target_len;
2526 } else if (end < 0) {
2527 end += target_len;
2528 if (end < 0)
2529 end = 0;
2530 }
2531
2532 /* zero-length substrings always match at the first attempt */
2533 if (pattern_len == 0)
2534 return (direction > 0) ? start : end;
2535
2536 end -= pattern_len;
2537
2538 if (direction < 0) {
2539 for (; end >= start; end--)
2540 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2541 return end;
2542 } else {
2543 for (; start <= end; start++)
2544 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2545 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546 }
2547 return -1;
2548}
2549
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002550Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002551countstring(const char *target, Py_ssize_t target_len,
2552 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002553 Py_ssize_t start,
2554 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002555 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002557 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002558
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 if (start < 0) {
2560 start += target_len;
2561 if (start < 0)
2562 start = 0;
2563 }
2564 if (end > target_len) {
2565 end = target_len;
2566 } else if (end < 0) {
2567 end += target_len;
2568 if (end < 0)
2569 end = 0;
2570 }
2571
2572 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002573 if (pattern_len == 0 || maxcount == 0) {
2574 if (target_len+1 < maxcount)
2575 return target_len+1;
2576 return maxcount;
2577 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002578
2579 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002581 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002582 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2583 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002584 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585 end -= pattern_len-1;
2586 }
2587 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002588 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2590 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002591 if (--maxcount <= 0)
2592 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 start += pattern_len-1;
2594 }
2595 }
2596 return count;
2597}
2598
2599
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002600/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002601
2602/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002603Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002604replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002605 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002606 Py_ssize_t maxcount)
2607{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002608 char *self_s, *result_s;
2609 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002610 Py_ssize_t count, i, product;
2611 PyStringObject *result;
2612
2613 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002614
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615 /* 1 at the end plus 1 after every character */
2616 count = self_len+1;
2617 if (maxcount < count)
2618 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002619
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002620 /* Check for overflow */
2621 /* result_len = count * to_len + self_len; */
2622 product = count * to_len;
2623 if (product / to_len != count) {
2624 PyErr_SetString(PyExc_OverflowError,
2625 "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = product + self_len;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError,
2631 "replace string is too long");
2632 return NULL;
2633 }
2634
2635 if (! (result = (PyStringObject *)
2636 PyString_FromStringAndSize(NULL, result_len)) )
2637 return NULL;
2638
2639 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002640 result_s = PyString_AS_STRING(result);
2641
2642 /* TODO: special case single character, which doesn't need memcpy */
2643
2644 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002645 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646 result_s += to_len;
2647 count -= 1;
2648
2649 for (i=0; i<count; i++) {
2650 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002651 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002652 result_s += to_len;
2653 }
2654
2655 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002656 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002657
2658 return result;
2659}
2660
2661/* Special case for deleting a single character */
2662/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002663Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002664replace_delete_single_character(PyStringObject *self,
2665 char from_c, Py_ssize_t maxcount)
2666{
2667 char *self_s, *result_s;
2668 char *start, *next, *end;
2669 Py_ssize_t self_len, result_len;
2670 Py_ssize_t count;
2671 PyStringObject *result;
2672
2673 self_len = PyString_GET_SIZE(self);
2674 self_s = PyString_AS_STRING(self);
2675
Andrew Dalke51324072006-05-26 20:25:22 +00002676 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677 if (count == 0) {
2678 return return_self(self);
2679 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680
2681 result_len = self_len - count; /* from_len == 1 */
2682 assert(result_len>=0);
2683
2684 if ( (result = (PyStringObject *)
2685 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2686 return NULL;
2687 result_s = PyString_AS_STRING(result);
2688
2689 start = self_s;
2690 end = self_s + self_len;
2691 while (count-- > 0) {
2692 next = findchar(start, end-start, from_c);
2693 if (next == NULL)
2694 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002695 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696 result_s += (next-start);
2697 start = next+1;
2698 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002699 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002700
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002701 return result;
2702}
2703
2704/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2705
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002706Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002707replace_delete_substring(PyStringObject *self,
2708 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002709 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002710 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002711 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002712 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002713 Py_ssize_t count, offset;
2714 PyStringObject *result;
2715
2716 self_len = PyString_GET_SIZE(self);
2717 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718
2719 count = countstring(self_s, self_len,
2720 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002721 0, self_len, 1,
2722 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723
2724 if (count == 0) {
2725 /* no matches */
2726 return return_self(self);
2727 }
2728
2729 result_len = self_len - (count * from_len);
2730 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002731
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002732 if ( (result = (PyStringObject *)
2733 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2734 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002735
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002737
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738 start = self_s;
2739 end = self_s + self_len;
2740 while (count-- > 0) {
2741 offset = findstring(start, end-start,
2742 from_s, from_len,
2743 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002744 if (offset == -1)
2745 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002747
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002748 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002749
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002750 result_s += (next-start);
2751 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002752 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002753 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002755}
2756
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002758Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759replace_single_character_in_place(PyStringObject *self,
2760 char from_c, char to_c,
2761 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002762{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763 char *self_s, *result_s, *start, *end, *next;
2764 Py_ssize_t self_len;
2765 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002766
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767 /* The result string will be the same size */
2768 self_s = PyString_AS_STRING(self);
2769 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002770
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002772
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773 if (next == NULL) {
2774 /* No matches; return the original string */
2775 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002776 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002777
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002779 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780 if (result == NULL)
2781 return NULL;
2782 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002783 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002784
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785 /* change everything in-place, starting with this one */
2786 start = result_s + (next-self_s);
2787 *start = to_c;
2788 start++;
2789 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002790
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 while (--maxcount > 0) {
2792 next = findchar(start, end-start, from_c);
2793 if (next == NULL)
2794 break;
2795 *next = to_c;
2796 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002797 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002800}
2801
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002803Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002804replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002805 const char *from_s, Py_ssize_t from_len,
2806 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 Py_ssize_t maxcount)
2808{
2809 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002810 char *self_s;
2811 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002813
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002815
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 self_s = PyString_AS_STRING(self);
2817 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002818
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 offset = findstring(self_s, self_len,
2820 from_s, from_len,
2821 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 if (offset == -1) {
2823 /* No matches; return the original string */
2824 return return_self(self);
2825 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002826
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002828 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 if (result == NULL)
2830 return NULL;
2831 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002832 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002833
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834 /* change everything in-place, starting with this one */
2835 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002836 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002837 start += from_len;
2838 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002839
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840 while ( --maxcount > 0) {
2841 offset = findstring(start, end-start,
2842 from_s, from_len,
2843 0, end-start, FORWARD);
2844 if (offset==-1)
2845 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002846 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 start += offset+from_len;
2848 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002849
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850 return result;
2851}
2852
2853/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002854Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855replace_single_character(PyStringObject *self,
2856 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002857 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002858 Py_ssize_t maxcount)
2859{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002860 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002861 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002862 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863 Py_ssize_t count, product;
2864 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002865
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002866 self_s = PyString_AS_STRING(self);
2867 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002868
Andrew Dalke51324072006-05-26 20:25:22 +00002869 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 if (count == 0) {
2871 /* no matches, return unchanged */
2872 return return_self(self);
2873 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875 /* use the difference between current and new, hence the "-1" */
2876 /* result_len = self_len + count * (to_len-1) */
2877 product = count * (to_len-1);
2878 if (product / (to_len-1) != count) {
2879 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2880 return NULL;
2881 }
2882 result_len = self_len + product;
2883 if (result_len < 0) {
2884 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2885 return NULL;
2886 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002887
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 if ( (result = (PyStringObject *)
2889 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2890 return NULL;
2891 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002892
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002893 start = self_s;
2894 end = self_s + self_len;
2895 while (count-- > 0) {
2896 next = findchar(start, end-start, from_c);
2897 if (next == NULL)
2898 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002899
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900 if (next == start) {
2901 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002902 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903 result_s += to_len;
2904 start += 1;
2905 } else {
2906 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002907 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002909 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 result_s += to_len;
2911 start = next+1;
2912 }
2913 }
2914 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002915 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002916
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 return result;
2918}
2919
2920/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002921Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002923 const char *from_s, Py_ssize_t from_len,
2924 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002925 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002926 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002927 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002928 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002929 Py_ssize_t count, offset, product;
2930 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002931
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002932 self_s = PyString_AS_STRING(self);
2933 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002934
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 count = countstring(self_s, self_len,
2936 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002937 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002938 if (count == 0) {
2939 /* no matches, return unchanged */
2940 return return_self(self);
2941 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002942
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002943 /* Check for overflow */
2944 /* result_len = self_len + count * (to_len-from_len) */
2945 product = count * (to_len-from_len);
2946 if (product / (to_len-from_len) != count) {
2947 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2948 return NULL;
2949 }
2950 result_len = self_len + product;
2951 if (result_len < 0) {
2952 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2953 return NULL;
2954 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002955
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002956 if ( (result = (PyStringObject *)
2957 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2958 return NULL;
2959 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002960
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 start = self_s;
2962 end = self_s + self_len;
2963 while (count-- > 0) {
2964 offset = findstring(start, end-start,
2965 from_s, from_len,
2966 0, end-start, FORWARD);
2967 if (offset == -1)
2968 break;
2969 next = start+offset;
2970 if (next == start) {
2971 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002972 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 result_s += to_len;
2974 start += from_len;
2975 } else {
2976 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002977 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002979 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002980 result_s += to_len;
2981 start = next+from_len;
2982 }
2983 }
2984 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002985 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002986
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002987 return result;
2988}
2989
2990
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002991Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002992replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002993 const char *from_s, Py_ssize_t from_len,
2994 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002995 Py_ssize_t maxcount)
2996{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002997 if (maxcount < 0) {
2998 maxcount = PY_SSIZE_T_MAX;
2999 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3000 /* nothing to do; return the original string */
3001 return return_self(self);
3002 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003003
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003004 if (maxcount == 0 ||
3005 (from_len == 0 && to_len == 0)) {
3006 /* nothing to do; return the original string */
3007 return return_self(self);
3008 }
3009
3010 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003011
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 if (from_len == 0) {
3013 /* insert the 'to' string everywhere. */
3014 /* >>> "Python".replace("", ".") */
3015 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003016 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 }
3018
3019 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3020 /* point for an empty self string to generate a non-empty string */
3021 /* Special case so the remaining code always gets a non-empty string */
3022 if (PyString_GET_SIZE(self) == 0) {
3023 return return_self(self);
3024 }
3025
3026 if (to_len == 0) {
3027 /* delete all occurances of 'from' string */
3028 if (from_len == 1) {
3029 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003030 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003031 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003032 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003033 }
3034 }
3035
3036 /* Handle special case where both strings have the same length */
3037
3038 if (from_len == to_len) {
3039 if (from_len == 1) {
3040 return replace_single_character_in_place(
3041 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003042 from_s[0],
3043 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003044 maxcount);
3045 } else {
3046 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003047 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048 }
3049 }
3050
3051 /* Otherwise use the more generic algorithms */
3052 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003053 return replace_single_character(self, from_s[0],
3054 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 } else {
3056 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003057 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003058 }
3059}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003060
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003061PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003062"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003063\n\
3064Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003065old replaced by new. If the optional argument count is\n\
3066given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003067
3068static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003069string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003070{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003071 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 const char *from_s, *to_s;
3074 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003077 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003078
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003079 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003080 from_s = PyString_AS_STRING(from);
3081 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003083#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003085 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003086 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003087#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003088 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003089 return NULL;
3090
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003091 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003092 to_s = PyString_AS_STRING(to);
3093 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003094 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003095#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003096 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003097 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003098 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003099#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003100 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101 return NULL;
3102
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003103 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003104 from_s, from_len,
3105 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003106}
3107
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003108/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003109
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003110/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003111 * against substr, using the start and end arguments. Returns
3112 * -1 on error, 0 if not found and 1 if found.
3113 */
3114Py_LOCAL(int)
3115_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3116 Py_ssize_t end, int direction)
3117{
3118 Py_ssize_t len = PyString_GET_SIZE(self);
3119 Py_ssize_t slen;
3120 const char* sub;
3121 const char* str;
3122
3123 if (PyString_Check(substr)) {
3124 sub = PyString_AS_STRING(substr);
3125 slen = PyString_GET_SIZE(substr);
3126 }
3127#ifdef Py_USING_UNICODE
3128 else if (PyUnicode_Check(substr))
3129 return PyUnicode_Tailmatch((PyObject *)self,
3130 substr, start, end, direction);
3131#endif
3132 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3133 return -1;
3134 str = PyString_AS_STRING(self);
3135
3136 string_adjust_indices(&start, &end, len);
3137
3138 if (direction < 0) {
3139 /* startswith */
3140 if (start+slen > len)
3141 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003142 } else {
3143 /* endswith */
3144 if (end-start < slen || start > len)
3145 return 0;
3146
3147 if (end-slen > start)
3148 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003149 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003150 if (end-start >= slen)
3151 return ! memcmp(str+start, sub, slen);
3152 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003153}
3154
3155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003156PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003157"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003159Return True if S starts with the specified prefix, False otherwise.\n\
3160With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003161With optional end, stop comparing S at that position.\n\
3162prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003163
3164static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003165string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003167 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003168 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003170 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171
Guido van Rossumc6821402000-05-08 14:08:05 +00003172 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3173 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003175 if (PyTuple_Check(subobj)) {
3176 Py_ssize_t i;
3177 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3178 result = _string_tailmatch(self,
3179 PyTuple_GET_ITEM(subobj, i),
3180 start, end, -1);
3181 if (result == -1)
3182 return NULL;
3183 else if (result) {
3184 Py_RETURN_TRUE;
3185 }
3186 }
3187 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 }
Georg Brandl24250812006-06-09 18:45:48 +00003189 result = _string_tailmatch(self, subobj, start, end, -1);
3190 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003192 else
Georg Brandl24250812006-06-09 18:45:48 +00003193 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003194}
3195
3196
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003197PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003198"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003200Return True if S ends with the specified suffix, False otherwise.\n\
3201With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003202With optional end, stop comparing S at that position.\n\
3203suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003204
3205static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003206string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003207{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003208 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003209 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003211 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212
Guido van Rossumc6821402000-05-08 14:08:05 +00003213 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3214 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003215 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003216 if (PyTuple_Check(subobj)) {
3217 Py_ssize_t i;
3218 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3219 result = _string_tailmatch(self,
3220 PyTuple_GET_ITEM(subobj, i),
3221 start, end, +1);
3222 if (result == -1)
3223 return NULL;
3224 else if (result) {
3225 Py_RETURN_TRUE;
3226 }
3227 }
3228 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003229 }
Georg Brandl24250812006-06-09 18:45:48 +00003230 result = _string_tailmatch(self, subobj, start, end, +1);
3231 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003232 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003233 else
Georg Brandl24250812006-06-09 18:45:48 +00003234 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003235}
3236
3237
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003238PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003239"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003240\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003241Encodes S using the codec registered for encoding. encoding defaults\n\
3242to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003243handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003244a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3245'xmlcharrefreplace' as well as any other name registered with\n\
3246codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003247
3248static PyObject *
3249string_encode(PyStringObject *self, PyObject *args)
3250{
3251 char *encoding = NULL;
3252 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003253 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003254
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003255 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3256 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003257 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003258 if (v == NULL)
3259 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003260 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3261 PyErr_Format(PyExc_TypeError,
3262 "encoder did not return a string/unicode object "
3263 "(type=%.400s)",
3264 v->ob_type->tp_name);
3265 Py_DECREF(v);
3266 return NULL;
3267 }
3268 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003269
3270 onError:
3271 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003272}
3273
3274
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003275PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003276"S.decode([encoding[,errors]]) -> object\n\
3277\n\
3278Decodes S using the codec registered for encoding. encoding defaults\n\
3279to the default encoding. errors may be given to set a different error\n\
3280handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003281a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3282as well as any other name registerd with codecs.register_error that is\n\
3283able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003284
3285static PyObject *
3286string_decode(PyStringObject *self, PyObject *args)
3287{
3288 char *encoding = NULL;
3289 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003290 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003291
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003292 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3293 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003294 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003295 if (v == NULL)
3296 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003297 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3298 PyErr_Format(PyExc_TypeError,
3299 "decoder did not return a string/unicode object "
3300 "(type=%.400s)",
3301 v->ob_type->tp_name);
3302 Py_DECREF(v);
3303 return NULL;
3304 }
3305 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003306
3307 onError:
3308 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003309}
3310
3311
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003312PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313"S.expandtabs([tabsize]) -> string\n\
3314\n\
3315Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003316If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003317
3318static PyObject*
3319string_expandtabs(PyStringObject *self, PyObject *args)
3320{
Guido van Rossum44a93e52008-03-11 21:14:54 +00003321 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003322 char *q;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003323 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324 PyObject *u;
3325 int tabsize = 8;
3326
3327 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3328 return NULL;
3329
Thomas Wouters7e474022000-07-16 12:04:32 +00003330 /* First pass: determine size of output string */
Guido van Rossum44a93e52008-03-11 21:14:54 +00003331 i = 0; /* chars up to and including most recent \n or \r */
3332 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3333 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003334 for (p = PyString_AS_STRING(self); p < e; p++)
3335 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003336 if (tabsize > 0) {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003337 incr = tabsize - (j % tabsize);
3338 if (j > PY_SSIZE_T_MAX - incr)
3339 goto overflow1;
3340 j += incr;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003341 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 }
3343 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003344 if (j > PY_SSIZE_T_MAX - 1)
3345 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003346 j++;
3347 if (*p == '\n' || *p == '\r') {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003348 if (i > PY_SSIZE_T_MAX - j)
3349 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003350 i += j;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003351 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 }
3353 }
3354
Guido van Rossum44a93e52008-03-11 21:14:54 +00003355 if (i > PY_SSIZE_T_MAX - j)
3356 goto overflow1;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003357
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358 /* Second pass: create output string and fill it */
3359 u = PyString_FromStringAndSize(NULL, i + j);
3360 if (!u)
3361 return NULL;
3362
Guido van Rossum44a93e52008-03-11 21:14:54 +00003363 j = 0; /* same as in first pass */
3364 q = PyString_AS_STRING(u); /* next output char */
3365 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003366
3367 for (p = PyString_AS_STRING(self); p < e; p++)
3368 if (*p == '\t') {
3369 if (tabsize > 0) {
3370 i = tabsize - (j % tabsize);
3371 j += i;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003372 while (i--) {
3373 if (q >= qe)
3374 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003375 *q++ = ' ';
Guido van Rossum44a93e52008-03-11 21:14:54 +00003376 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003377 }
3378 }
3379 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003380 if (q >= qe)
3381 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382 *q++ = *p;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003383 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384 if (*p == '\n' || *p == '\r')
3385 j = 0;
3386 }
3387
3388 return u;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003389
3390 overflow2:
3391 Py_DECREF(u);
3392 overflow1:
3393 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3394 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395}
3396
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003397Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003398pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399{
3400 PyObject *u;
3401
3402 if (left < 0)
3403 left = 0;
3404 if (right < 0)
3405 right = 0;
3406
Tim Peters8fa5dd02001-09-12 02:18:30 +00003407 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003408 Py_INCREF(self);
3409 return (PyObject *)self;
3410 }
3411
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003412 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003413 left + PyString_GET_SIZE(self) + right);
3414 if (u) {
3415 if (left)
3416 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003417 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003418 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419 PyString_GET_SIZE(self));
3420 if (right)
3421 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3422 fill, right);
3423 }
3424
3425 return u;
3426}
3427
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003428PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003430"\n"
3431"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003432"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433
3434static PyObject *
3435string_ljust(PyStringObject *self, PyObject *args)
3436{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003437 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003438 char fillchar = ' ';
3439
Thomas Wouters4abb3662006-04-19 14:50:15 +00003440 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003441 return NULL;
3442
Tim Peters8fa5dd02001-09-12 02:18:30 +00003443 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444 Py_INCREF(self);
3445 return (PyObject*) self;
3446 }
3447
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003448 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449}
3450
3451
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003452PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003453"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003454"\n"
3455"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003456"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003457
3458static PyObject *
3459string_rjust(PyStringObject *self, PyObject *args)
3460{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003461 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003462 char fillchar = ' ';
3463
Thomas Wouters4abb3662006-04-19 14:50:15 +00003464 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003465 return NULL;
3466
Tim Peters8fa5dd02001-09-12 02:18:30 +00003467 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468 Py_INCREF(self);
3469 return (PyObject*) self;
3470 }
3471
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003472 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473}
3474
3475
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003476PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003477"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003478"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003479"Return S centered in a string of length width. Padding is\n"
3480"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003481
3482static PyObject *
3483string_center(PyStringObject *self, PyObject *args)
3484{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003485 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003486 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003487 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488
Thomas Wouters4abb3662006-04-19 14:50:15 +00003489 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003490 return NULL;
3491
Tim Peters8fa5dd02001-09-12 02:18:30 +00003492 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493 Py_INCREF(self);
3494 return (PyObject*) self;
3495 }
3496
3497 marg = width - PyString_GET_SIZE(self);
3498 left = marg / 2 + (marg & width & 1);
3499
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003500 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501}
3502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003503PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003504"S.zfill(width) -> string\n"
3505"\n"
3506"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003507"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003508
3509static PyObject *
3510string_zfill(PyStringObject *self, PyObject *args)
3511{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003512 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003513 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003514 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003515 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003516
Thomas Wouters4abb3662006-04-19 14:50:15 +00003517 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003518 return NULL;
3519
3520 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003521 if (PyString_CheckExact(self)) {
3522 Py_INCREF(self);
3523 return (PyObject*) self;
3524 }
3525 else
3526 return PyString_FromStringAndSize(
3527 PyString_AS_STRING(self),
3528 PyString_GET_SIZE(self)
3529 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003530 }
3531
3532 fill = width - PyString_GET_SIZE(self);
3533
3534 s = pad(self, fill, 0, '0');
3535
3536 if (s == NULL)
3537 return NULL;
3538
3539 p = PyString_AS_STRING(s);
3540 if (p[fill] == '+' || p[fill] == '-') {
3541 /* move sign to beginning of string */
3542 p[0] = p[fill];
3543 p[fill] = '0';
3544 }
3545
3546 return (PyObject*) s;
3547}
3548
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003549PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003550"S.isspace() -> bool\n\
3551\n\
3552Return True if all characters in S are whitespace\n\
3553and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554
3555static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003556string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557{
Fred Drakeba096332000-07-09 07:04:36 +00003558 register const unsigned char *p
3559 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003560 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562 /* Shortcut for single character strings */
3563 if (PyString_GET_SIZE(self) == 1 &&
3564 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003567 /* Special case for empty strings */
3568 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003570
Guido van Rossum4c08d552000-03-10 22:55:18 +00003571 e = p + PyString_GET_SIZE(self);
3572 for (; p < e; p++) {
3573 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003575 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577}
3578
3579
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003580PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003583Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003584and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003585
3586static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003587string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003588{
Fred Drakeba096332000-07-09 07:04:36 +00003589 register const unsigned char *p
3590 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003591 register const unsigned char *e;
3592
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593 /* Shortcut for single character strings */
3594 if (PyString_GET_SIZE(self) == 1 &&
3595 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597
3598 /* Special case for empty strings */
3599 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003601
3602 e = p + PyString_GET_SIZE(self);
3603 for (; p < e; p++) {
3604 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003605 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003606 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003608}
3609
3610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003611PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003614Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003616
3617static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003618string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003619{
Fred Drakeba096332000-07-09 07:04:36 +00003620 register const unsigned char *p
3621 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003622 register const unsigned char *e;
3623
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003624 /* Shortcut for single character strings */
3625 if (PyString_GET_SIZE(self) == 1 &&
3626 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003628
3629 /* Special case for empty strings */
3630 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003632
3633 e = p + PyString_GET_SIZE(self);
3634 for (; p < e; p++) {
3635 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003636 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003637 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003639}
3640
3641
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003642PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003645Return True if all characters in S are digits\n\
3646and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647
3648static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003649string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650{
Fred Drakeba096332000-07-09 07:04:36 +00003651 register const unsigned char *p
3652 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003653 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 /* Shortcut for single character strings */
3656 if (PyString_GET_SIZE(self) == 1 &&
3657 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003660 /* Special case for empty strings */
3661 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003663
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664 e = p + PyString_GET_SIZE(self);
3665 for (; p < e; p++) {
3666 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670}
3671
3672
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003673PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003675\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003677at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678
3679static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003680string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681{
Fred Drakeba096332000-07-09 07:04:36 +00003682 register const unsigned char *p
3683 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003684 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685 int cased;
3686
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687 /* Shortcut for single character strings */
3688 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003691 /* Special case for empty strings */
3692 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003693 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003694
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695 e = p + PyString_GET_SIZE(self);
3696 cased = 0;
3697 for (; p < e; p++) {
3698 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 else if (!cased && islower(*p))
3701 cased = 1;
3702 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704}
3705
3706
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003707PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003710Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003711at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712
3713static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003714string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715{
Fred Drakeba096332000-07-09 07:04:36 +00003716 register const unsigned char *p
3717 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003718 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 int cased;
3720
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721 /* Shortcut for single character strings */
3722 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003725 /* Special case for empty strings */
3726 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003728
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 e = p + PyString_GET_SIZE(self);
3730 cased = 0;
3731 for (; p < e; p++) {
3732 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003733 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734 else if (!cased && isupper(*p))
3735 cased = 1;
3736 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003737 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738}
3739
3740
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003741PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003742"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003744Return True if S is a titlecased string and there is at least one\n\
3745character in S, i.e. uppercase characters may only follow uncased\n\
3746characters and lowercase characters only cased ones. Return False\n\
3747otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748
3749static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003750string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751{
Fred Drakeba096332000-07-09 07:04:36 +00003752 register const unsigned char *p
3753 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003754 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755 int cased, previous_is_cased;
3756
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 /* Shortcut for single character strings */
3758 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003761 /* Special case for empty strings */
3762 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003763 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003764
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 e = p + PyString_GET_SIZE(self);
3766 cased = 0;
3767 previous_is_cased = 0;
3768 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003769 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770
3771 if (isupper(ch)) {
3772 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003773 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 previous_is_cased = 1;
3775 cased = 1;
3776 }
3777 else if (islower(ch)) {
3778 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003779 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780 previous_is_cased = 1;
3781 cased = 1;
3782 }
3783 else
3784 previous_is_cased = 0;
3785 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003786 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787}
3788
3789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003790PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003791"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792\n\
3793Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003794Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003795is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797static PyObject*
3798string_splitlines(PyStringObject *self, PyObject *args)
3799{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003800 register Py_ssize_t i;
3801 register Py_ssize_t j;
3802 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003803 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804 PyObject *list;
3805 PyObject *str;
3806 char *data;
3807
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003808 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809 return NULL;
3810
3811 data = PyString_AS_STRING(self);
3812 len = PyString_GET_SIZE(self);
3813
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003814 /* This does not use the preallocated list because splitlines is
3815 usually run with hundreds of newlines. The overhead of
3816 switching between PyList_SET_ITEM and append causes about a
3817 2-3% slowdown for that common case. A smarter implementation
3818 could move the if check out, so the SET_ITEMs are done first
3819 and the appends only done when the prealloc buffer is full.
3820 That's too much work for little gain.*/
3821
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822 list = PyList_New(0);
3823 if (!list)
3824 goto onError;
3825
3826 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003827 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003828
Guido van Rossum4c08d552000-03-10 22:55:18 +00003829 /* Find a line and append it */
3830 while (i < len && data[i] != '\n' && data[i] != '\r')
3831 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003832
3833 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003834 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835 if (i < len) {
3836 if (data[i] == '\r' && i + 1 < len &&
3837 data[i+1] == '\n')
3838 i += 2;
3839 else
3840 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003841 if (keepends)
3842 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003843 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003844 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845 j = i;
3846 }
3847 if (j < len) {
3848 SPLIT_APPEND(data, j, len);
3849 }
3850
3851 return list;
3852
3853 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003854 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003855 return NULL;
3856}
3857
3858#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003859#undef SPLIT_ADD
3860#undef MAX_PREALLOC
3861#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003862
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003863static PyObject *
3864string_getnewargs(PyStringObject *v)
3865{
3866 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3867}
3868
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003869
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003870static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003871string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003872 /* Counterparts of the obsolete stropmodule functions; except
3873 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003874 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3875 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003876 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003877 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3878 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003879 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3880 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3881 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3882 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3883 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3884 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3885 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003886 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3887 capitalize__doc__},
3888 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3889 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3890 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003891 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003892 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3893 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3894 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3895 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3896 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3897 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3898 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003899 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3900 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003901 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3902 startswith__doc__},
3903 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3904 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3905 swapcase__doc__},
3906 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3907 translate__doc__},
3908 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3909 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3910 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3911 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3912 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3913 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3914 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3915 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3916 expandtabs__doc__},
3917 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3918 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003919 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003920 {NULL, NULL} /* sentinel */
3921};
3922
Jeremy Hylton938ace62002-07-17 16:30:39 +00003923static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003924str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3925
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003926static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003927string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003928{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003929 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003930 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003931
Guido van Rossumae960af2001-08-30 03:11:59 +00003932 if (type != &PyString_Type)
3933 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003934 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3935 return NULL;
3936 if (x == NULL)
3937 return PyString_FromString("");
3938 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003939}
3940
Guido van Rossumae960af2001-08-30 03:11:59 +00003941static PyObject *
3942str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3943{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003944 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003945 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003946
3947 assert(PyType_IsSubtype(type, &PyString_Type));
3948 tmp = string_new(&PyString_Type, args, kwds);
3949 if (tmp == NULL)
3950 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003951 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003952 n = PyString_GET_SIZE(tmp);
3953 pnew = type->tp_alloc(type, n);
3954 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003955 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003956 ((PyStringObject *)pnew)->ob_shash =
3957 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003958 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003959 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003960 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003961 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003962}
3963
Guido van Rossumcacfc072002-05-24 19:01:59 +00003964static PyObject *
3965basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3966{
3967 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003968 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003969 return NULL;
3970}
3971
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003972static PyObject *
3973string_mod(PyObject *v, PyObject *w)
3974{
3975 if (!PyString_Check(v)) {
3976 Py_INCREF(Py_NotImplemented);
3977 return Py_NotImplemented;
3978 }
3979 return PyString_Format(v, w);
3980}
3981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003982PyDoc_STRVAR(basestring_doc,
3983"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003984
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003985static PyNumberMethods string_as_number = {
3986 0, /*nb_add*/
3987 0, /*nb_subtract*/
3988 0, /*nb_multiply*/
3989 0, /*nb_divide*/
3990 string_mod, /*nb_remainder*/
3991};
3992
3993
Guido van Rossumcacfc072002-05-24 19:01:59 +00003994PyTypeObject PyBaseString_Type = {
3995 PyObject_HEAD_INIT(&PyType_Type)
3996 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003997 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003998 0,
3999 0,
4000 0, /* tp_dealloc */
4001 0, /* tp_print */
4002 0, /* tp_getattr */
4003 0, /* tp_setattr */
4004 0, /* tp_compare */
4005 0, /* tp_repr */
4006 0, /* tp_as_number */
4007 0, /* tp_as_sequence */
4008 0, /* tp_as_mapping */
4009 0, /* tp_hash */
4010 0, /* tp_call */
4011 0, /* tp_str */
4012 0, /* tp_getattro */
4013 0, /* tp_setattro */
4014 0, /* tp_as_buffer */
4015 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4016 basestring_doc, /* tp_doc */
4017 0, /* tp_traverse */
4018 0, /* tp_clear */
4019 0, /* tp_richcompare */
4020 0, /* tp_weaklistoffset */
4021 0, /* tp_iter */
4022 0, /* tp_iternext */
4023 0, /* tp_methods */
4024 0, /* tp_members */
4025 0, /* tp_getset */
4026 &PyBaseObject_Type, /* tp_base */
4027 0, /* tp_dict */
4028 0, /* tp_descr_get */
4029 0, /* tp_descr_set */
4030 0, /* tp_dictoffset */
4031 0, /* tp_init */
4032 0, /* tp_alloc */
4033 basestring_new, /* tp_new */
4034 0, /* tp_free */
4035};
4036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004037PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004038"str(object) -> string\n\
4039\n\
4040Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004041If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043PyTypeObject PyString_Type = {
4044 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004045 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004046 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004048 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004049 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050 (printfunc)string_print, /* tp_print */
4051 0, /* tp_getattr */
4052 0, /* tp_setattr */
4053 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004054 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004055 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004056 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004057 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004058 (hashfunc)string_hash, /* tp_hash */
4059 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004060 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004061 PyObject_GenericGetAttr, /* tp_getattro */
4062 0, /* tp_setattro */
4063 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004064 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004065 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004066 string_doc, /* tp_doc */
4067 0, /* tp_traverse */
4068 0, /* tp_clear */
4069 (richcmpfunc)string_richcompare, /* tp_richcompare */
4070 0, /* tp_weaklistoffset */
4071 0, /* tp_iter */
4072 0, /* tp_iternext */
4073 string_methods, /* tp_methods */
4074 0, /* tp_members */
4075 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004076 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004077 0, /* tp_dict */
4078 0, /* tp_descr_get */
4079 0, /* tp_descr_set */
4080 0, /* tp_dictoffset */
4081 0, /* tp_init */
4082 0, /* tp_alloc */
4083 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004084 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085};
4086
4087void
Fred Drakeba096332000-07-09 07:04:36 +00004088PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004090 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004091 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004093 if (w == NULL || !PyString_Check(*pv)) {
4094 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004095 *pv = NULL;
4096 return;
4097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004098 v = string_concat((PyStringObject *) *pv, w);
4099 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100 *pv = v;
4101}
4102
Guido van Rossum013142a1994-08-30 08:19:36 +00004103void
Fred Drakeba096332000-07-09 07:04:36 +00004104PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004105{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 PyString_Concat(pv, w);
4107 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004108}
4109
4110
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111/* The following function breaks the notion that strings are immutable:
4112 it changes the size of a string. We get away with this only if there
4113 is only one module referencing the object. You can also think of it
4114 as creating a new string object and destroying the old one, only
4115 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004116 already be known to some other part of the code...
4117 Note that if there's not enough memory to resize the string, the original
4118 string object at *pv is deallocated, *pv is set to NULL, an "out of
4119 memory" exception is set, and -1 is returned. Else (on success) 0 is
4120 returned, and the value in *pv may or may not be the same as on input.
4121 As always, an extra byte is allocated for a trailing \0 byte (newsize
4122 does *not* include that), and a trailing \0 byte is stored.
4123*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124
4125int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004126_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004128 register PyObject *v;
4129 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004130 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004131 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4132 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004133 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 Py_DECREF(v);
4135 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004136 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004137 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004138 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004139 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004140 _Py_ForgetReference(v);
4141 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004142 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004143 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004144 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004145 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004146 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004148 _Py_NewReference(*pv);
4149 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004150 sv->ob_size = newsize;
4151 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004152 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004153 return 0;
4154}
Guido van Rossume5372401993-03-16 12:15:04 +00004155
4156/* Helpers for formatstring */
4157
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004158Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004159getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004160{
Thomas Wouters977485d2006-02-16 15:59:12 +00004161 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004162 if (argidx < arglen) {
4163 (*p_argidx)++;
4164 if (arglen < 0)
4165 return args;
4166 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004168 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004169 PyErr_SetString(PyExc_TypeError,
4170 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004171 return NULL;
4172}
4173
Tim Peters38fd5b62000-09-21 05:43:11 +00004174/* Format codes
4175 * F_LJUST '-'
4176 * F_SIGN '+'
4177 * F_BLANK ' '
4178 * F_ALT '#'
4179 * F_ZERO '0'
4180 */
Guido van Rossume5372401993-03-16 12:15:04 +00004181#define F_LJUST (1<<0)
4182#define F_SIGN (1<<1)
4183#define F_BLANK (1<<2)
4184#define F_ALT (1<<3)
4185#define F_ZERO (1<<4)
4186
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004187Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004188formatfloat(char *buf, size_t buflen, int flags,
4189 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004190{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004191 /* fmt = '%#.' + `prec` + `type`
4192 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004193 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004194 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004195 x = PyFloat_AsDouble(v);
4196 if (x == -1.0 && PyErr_Occurred()) {
4197 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004198 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004199 }
Guido van Rossume5372401993-03-16 12:15:04 +00004200 if (prec < 0)
4201 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004202 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4203 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004204 /* Worst case length calc to ensure no buffer overrun:
4205
4206 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004207 fmt = %#.<prec>g
4208 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004209 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004210 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004211
4212 'f' formats:
4213 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4214 len = 1 + 50 + 1 + prec = 52 + prec
4215
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004216 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004217 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004218
4219 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004220 if (((type == 'g' || type == 'G') &&
4221 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004222 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004223 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004224 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004225 return -1;
4226 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004227 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4228 (flags&F_ALT) ? "#" : "",
4229 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004230 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004231 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004232}
4233
Tim Peters38fd5b62000-09-21 05:43:11 +00004234/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4235 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4236 * Python's regular ints.
4237 * Return value: a new PyString*, or NULL if error.
4238 * . *pbuf is set to point into it,
4239 * *plen set to the # of chars following that.
4240 * Caller must decref it when done using pbuf.
4241 * The string starting at *pbuf is of the form
4242 * "-"? ("0x" | "0X")? digit+
4243 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004244 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004245 * There will be at least prec digits, zero-filled on the left if
4246 * necessary to get that many.
4247 * val object to be converted
4248 * flags bitmask of format flags; only F_ALT is looked at
4249 * prec minimum number of digits; 0-fill on left if needed
4250 * type a character in [duoxX]; u acts the same as d
4251 *
4252 * CAUTION: o, x and X conversions on regular ints can never
4253 * produce a '-' sign, but can for Python's unbounded ints.
4254 */
4255PyObject*
4256_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4257 char **pbuf, int *plen)
4258{
4259 PyObject *result = NULL;
4260 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004261 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004262 int sign; /* 1 if '-', else 0 */
4263 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004264 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004265 int numdigits; /* len == numnondigits + numdigits */
4266 int numnondigits = 0;
4267
4268 switch (type) {
4269 case 'd':
4270 case 'u':
4271 result = val->ob_type->tp_str(val);
4272 break;
4273 case 'o':
4274 result = val->ob_type->tp_as_number->nb_oct(val);
4275 break;
4276 case 'x':
4277 case 'X':
4278 numnondigits = 2;
4279 result = val->ob_type->tp_as_number->nb_hex(val);
4280 break;
4281 default:
4282 assert(!"'type' not in [duoxX]");
4283 }
4284 if (!result)
4285 return NULL;
4286
Neal Norwitz56423e52006-08-13 18:11:08 +00004287 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004288 if (!buf) {
4289 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004290 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004291 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004292
Tim Peters38fd5b62000-09-21 05:43:11 +00004293 /* To modify the string in-place, there can only be one reference. */
4294 if (result->ob_refcnt != 1) {
4295 PyErr_BadInternalCall();
4296 return NULL;
4297 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004298 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004299 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004300 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4301 return NULL;
4302 }
4303 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004304 if (buf[len-1] == 'L') {
4305 --len;
4306 buf[len] = '\0';
4307 }
4308 sign = buf[0] == '-';
4309 numnondigits += sign;
4310 numdigits = len - numnondigits;
4311 assert(numdigits > 0);
4312
Tim Petersfff53252001-04-12 18:38:48 +00004313 /* Get rid of base marker unless F_ALT */
4314 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 /* Need to skip 0x, 0X or 0. */
4316 int skipped = 0;
4317 switch (type) {
4318 case 'o':
4319 assert(buf[sign] == '0');
4320 /* If 0 is only digit, leave it alone. */
4321 if (numdigits > 1) {
4322 skipped = 1;
4323 --numdigits;
4324 }
4325 break;
4326 case 'x':
4327 case 'X':
4328 assert(buf[sign] == '0');
4329 assert(buf[sign + 1] == 'x');
4330 skipped = 2;
4331 numnondigits -= 2;
4332 break;
4333 }
4334 if (skipped) {
4335 buf += skipped;
4336 len -= skipped;
4337 if (sign)
4338 buf[0] = '-';
4339 }
4340 assert(len == numnondigits + numdigits);
4341 assert(numdigits > 0);
4342 }
4343
4344 /* Fill with leading zeroes to meet minimum width. */
4345 if (prec > numdigits) {
4346 PyObject *r1 = PyString_FromStringAndSize(NULL,
4347 numnondigits + prec);
4348 char *b1;
4349 if (!r1) {
4350 Py_DECREF(result);
4351 return NULL;
4352 }
4353 b1 = PyString_AS_STRING(r1);
4354 for (i = 0; i < numnondigits; ++i)
4355 *b1++ = *buf++;
4356 for (i = 0; i < prec - numdigits; i++)
4357 *b1++ = '0';
4358 for (i = 0; i < numdigits; i++)
4359 *b1++ = *buf++;
4360 *b1 = '\0';
4361 Py_DECREF(result);
4362 result = r1;
4363 buf = PyString_AS_STRING(result);
4364 len = numnondigits + prec;
4365 }
4366
4367 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004368 if (type == 'X') {
4369 /* Need to convert all lower case letters to upper case.
4370 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004371 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004372 if (buf[i] >= 'a' && buf[i] <= 'x')
4373 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004374 }
4375 *pbuf = buf;
4376 *plen = len;
4377 return result;
4378}
4379
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004380Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004381formatint(char *buf, size_t buflen, int flags,
4382 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004383{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004384 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004385 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4386 + 1 + 1 = 24 */
4387 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004388 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004389 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004391 x = PyInt_AsLong(v);
4392 if (x == -1 && PyErr_Occurred()) {
4393 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004394 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004395 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004396 if (x < 0 && type == 'u') {
4397 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004398 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4400 sign = "-";
4401 else
4402 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004403 if (prec < 0)
4404 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004405
4406 if ((flags & F_ALT) &&
4407 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004408 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004409 * of issues that cause pain:
4410 * - when 0 is being converted, the C standard leaves off
4411 * the '0x' or '0X', which is inconsistent with other
4412 * %#x/%#X conversions and inconsistent with Python's
4413 * hex() function
4414 * - there are platforms that violate the standard and
4415 * convert 0 with the '0x' or '0X'
4416 * (Metrowerks, Compaq Tru64)
4417 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004418 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004419 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004420 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004421 * We can achieve the desired consistency by inserting our
4422 * own '0x' or '0X' prefix, and substituting %x/%X in place
4423 * of %#x/%#X.
4424 *
4425 * Note that this is the same approach as used in
4426 * formatint() in unicodeobject.c
4427 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004428 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4429 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004430 }
4431 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004432 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4433 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004434 prec, type);
4435 }
4436
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004437 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4438 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004439 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004440 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004441 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004442 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004443 return -1;
4444 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004445 if (sign[0])
4446 PyOS_snprintf(buf, buflen, fmt, -x);
4447 else
4448 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004449 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004450}
4451
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004452Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004453formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004454{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004455 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004456 if (PyString_Check(v)) {
4457 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004458 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004459 }
4460 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004461 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004462 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004463 }
4464 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004465 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004466}
4467
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004468/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4469
4470 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4471 chars are formatted. XXX This is a magic number. Each formatting
4472 routine does bounds checking to ensure no overflow, but a better
4473 solution may be to malloc a buffer of appropriate size for each
4474 format. For now, the current solution is sufficient.
4475*/
4476#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004477
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004478PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004479PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004480{
4481 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004482 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004483 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004484 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004485 PyObject *result, *orig_args;
4486#ifdef Py_USING_UNICODE
4487 PyObject *v, *w;
4488#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 PyObject *dict = NULL;
4490 if (format == NULL || !PyString_Check(format) || args == NULL) {
4491 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004492 return NULL;
4493 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004494 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004495 fmt = PyString_AS_STRING(format);
4496 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004497 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004499 if (result == NULL)
4500 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004501 res = PyString_AsString(result);
4502 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004503 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004504 argidx = 0;
4505 }
4506 else {
4507 arglen = -1;
4508 argidx = -2;
4509 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004510 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4511 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004512 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004513 while (--fmtcnt >= 0) {
4514 if (*fmt != '%') {
4515 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004516 rescnt = fmtcnt + 100;
4517 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004519 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004520 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004521 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004522 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004523 }
4524 *res++ = *fmt++;
4525 }
4526 else {
4527 /* Got a format specifier */
4528 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004529 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004530 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004531 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004532 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 PyObject *v = NULL;
4534 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004535 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004536 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004537 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004538 char formatbuf[FORMATBUFLEN];
4539 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004540#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004541 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004542 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004543#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004544
Guido van Rossumda9c2711996-12-05 21:58:58 +00004545 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004546 if (*fmt == '(') {
4547 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004548 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004549 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004550 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004551
4552 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004553 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004554 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004555 goto error;
4556 }
4557 ++fmt;
4558 --fmtcnt;
4559 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004560 /* Skip over balanced parentheses */
4561 while (pcount > 0 && --fmtcnt >= 0) {
4562 if (*fmt == ')')
4563 --pcount;
4564 else if (*fmt == '(')
4565 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004566 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004567 }
4568 keylen = fmt - keystart - 1;
4569 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004570 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004571 "incomplete format key");
4572 goto error;
4573 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004574 key = PyString_FromStringAndSize(keystart,
4575 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004576 if (key == NULL)
4577 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004578 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004579 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004580 args_owned = 0;
4581 }
4582 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004583 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004584 if (args == NULL) {
4585 goto error;
4586 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004587 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004588 arglen = -1;
4589 argidx = -2;
4590 }
Guido van Rossume5372401993-03-16 12:15:04 +00004591 while (--fmtcnt >= 0) {
4592 switch (c = *fmt++) {
4593 case '-': flags |= F_LJUST; continue;
4594 case '+': flags |= F_SIGN; continue;
4595 case ' ': flags |= F_BLANK; continue;
4596 case '#': flags |= F_ALT; continue;
4597 case '0': flags |= F_ZERO; continue;
4598 }
4599 break;
4600 }
4601 if (c == '*') {
4602 v = getnextarg(args, arglen, &argidx);
4603 if (v == NULL)
4604 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004605 if (!PyInt_Check(v)) {
4606 PyErr_SetString(PyExc_TypeError,
4607 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004608 goto error;
4609 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004610 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004611 if (width < 0) {
4612 flags |= F_LJUST;
4613 width = -width;
4614 }
Guido van Rossume5372401993-03-16 12:15:04 +00004615 if (--fmtcnt >= 0)
4616 c = *fmt++;
4617 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004618 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004619 width = c - '0';
4620 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004621 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004622 if (!isdigit(c))
4623 break;
4624 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004625 PyErr_SetString(
4626 PyExc_ValueError,
4627 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004628 goto error;
4629 }
4630 width = width*10 + (c - '0');
4631 }
4632 }
4633 if (c == '.') {
4634 prec = 0;
4635 if (--fmtcnt >= 0)
4636 c = *fmt++;
4637 if (c == '*') {
4638 v = getnextarg(args, arglen, &argidx);
4639 if (v == NULL)
4640 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004641 if (!PyInt_Check(v)) {
4642 PyErr_SetString(
4643 PyExc_TypeError,
4644 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004645 goto error;
4646 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004647 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004648 if (prec < 0)
4649 prec = 0;
4650 if (--fmtcnt >= 0)
4651 c = *fmt++;
4652 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004653 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004654 prec = c - '0';
4655 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004656 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004657 if (!isdigit(c))
4658 break;
4659 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004660 PyErr_SetString(
4661 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004662 "prec too big");
4663 goto error;
4664 }
4665 prec = prec*10 + (c - '0');
4666 }
4667 }
4668 } /* prec */
4669 if (fmtcnt >= 0) {
4670 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004671 if (--fmtcnt >= 0)
4672 c = *fmt++;
4673 }
4674 }
4675 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004676 PyErr_SetString(PyExc_ValueError,
4677 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004678 goto error;
4679 }
4680 if (c != '%') {
4681 v = getnextarg(args, arglen, &argidx);
4682 if (v == NULL)
4683 goto error;
4684 }
4685 sign = 0;
4686 fill = ' ';
4687 switch (c) {
4688 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004689 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004690 len = 1;
4691 break;
4692 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004693#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004694 if (PyUnicode_Check(v)) {
4695 fmt = fmt_start;
4696 argidx = argidx_start;
4697 goto unicode;
4698 }
Georg Brandld45014b2005-10-01 17:06:00 +00004699#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004700 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004701#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004702 if (temp != NULL && PyUnicode_Check(temp)) {
4703 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004704 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004705 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004706 goto unicode;
4707 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004708#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004709 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004710 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004711 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004712 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004713 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004714 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004715 if (!PyString_Check(temp)) {
4716 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004717 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004718 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004719 goto error;
4720 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004721 pbuf = PyString_AS_STRING(temp);
4722 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004723 if (prec >= 0 && len > prec)
4724 len = prec;
4725 break;
4726 case 'i':
4727 case 'd':
4728 case 'u':
4729 case 'o':
4730 case 'x':
4731 case 'X':
4732 if (c == 'i')
4733 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004734 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004735 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004736 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004737 prec, c, &pbuf, &ilen);
4738 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004739 if (!temp)
4740 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004741 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004742 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004743 else {
4744 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004745 len = formatint(pbuf,
4746 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004747 flags, prec, c, v);
4748 if (len < 0)
4749 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004750 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004751 }
4752 if (flags & F_ZERO)
4753 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004754 break;
4755 case 'e':
4756 case 'E':
4757 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004758 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004759 case 'g':
4760 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004761 if (c == 'F')
4762 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004763 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004764 len = formatfloat(pbuf, sizeof(formatbuf),
4765 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004766 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004767 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004768 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004769 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004770 fill = '0';
4771 break;
4772 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004773#ifdef Py_USING_UNICODE
4774 if (PyUnicode_Check(v)) {
4775 fmt = fmt_start;
4776 argidx = argidx_start;
4777 goto unicode;
4778 }
4779#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004780 pbuf = formatbuf;
4781 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004782 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004783 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004784 break;
4785 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004786 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004787 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004788 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004789 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004790 (Py_ssize_t)(fmt - 1 -
4791 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004792 goto error;
4793 }
4794 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004795 if (*pbuf == '-' || *pbuf == '+') {
4796 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004797 len--;
4798 }
4799 else if (flags & F_SIGN)
4800 sign = '+';
4801 else if (flags & F_BLANK)
4802 sign = ' ';
4803 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004804 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004805 }
4806 if (width < len)
4807 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004808 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004809 reslen -= rescnt;
4810 rescnt = width + fmtcnt + 100;
4811 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004812 if (reslen < 0) {
4813 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004814 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004815 return PyErr_NoMemory();
4816 }
Georg Brandl5f795862007-02-26 13:51:34 +00004817 if (_PyString_Resize(&result, reslen) < 0) {
4818 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004819 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004820 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004821 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004822 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004823 }
4824 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004825 if (fill != ' ')
4826 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004827 rescnt--;
4828 if (width > len)
4829 width--;
4830 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004831 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4832 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004833 assert(pbuf[1] == c);
4834 if (fill != ' ') {
4835 *res++ = *pbuf++;
4836 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004837 }
Tim Petersfff53252001-04-12 18:38:48 +00004838 rescnt -= 2;
4839 width -= 2;
4840 if (width < 0)
4841 width = 0;
4842 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004843 }
4844 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004845 do {
4846 --rescnt;
4847 *res++ = fill;
4848 } while (--width > len);
4849 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004850 if (fill == ' ') {
4851 if (sign)
4852 *res++ = sign;
4853 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004854 (c == 'x' || c == 'X')) {
4855 assert(pbuf[0] == '0');
4856 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004857 *res++ = *pbuf++;
4858 *res++ = *pbuf++;
4859 }
4860 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004861 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004862 res += len;
4863 rescnt -= len;
4864 while (--width >= len) {
4865 --rescnt;
4866 *res++ = ' ';
4867 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004868 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004869 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004870 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004871 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004872 goto error;
4873 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004874 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004875 } /* '%' */
4876 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004877 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004878 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004879 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004880 goto error;
4881 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004882 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004883 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004884 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004885 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004886 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004887
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004888#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004889 unicode:
4890 if (args_owned) {
4891 Py_DECREF(args);
4892 args_owned = 0;
4893 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004894 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004895 if (PyTuple_Check(orig_args) && argidx > 0) {
4896 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004897 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004898 v = PyTuple_New(n);
4899 if (v == NULL)
4900 goto error;
4901 while (--n >= 0) {
4902 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4903 Py_INCREF(w);
4904 PyTuple_SET_ITEM(v, n, w);
4905 }
4906 args = v;
4907 } else {
4908 Py_INCREF(orig_args);
4909 args = orig_args;
4910 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004911 args_owned = 1;
4912 /* Take what we have of the result and let the Unicode formatting
4913 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004914 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004915 if (_PyString_Resize(&result, rescnt))
4916 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004917 fmtcnt = PyString_GET_SIZE(format) - \
4918 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004919 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4920 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004921 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004922 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004923 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004924 if (v == NULL)
4925 goto error;
4926 /* Paste what we have (result) to what the Unicode formatting
4927 function returned (v) and return the result (or error) */
4928 w = PyUnicode_Concat(result, v);
4929 Py_DECREF(result);
4930 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004931 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004932 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004933#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004934
Guido van Rossume5372401993-03-16 12:15:04 +00004935 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004936 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004937 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004938 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004939 }
Guido van Rossume5372401993-03-16 12:15:04 +00004940 return NULL;
4941}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942
Guido van Rossum2a61e741997-01-18 07:55:05 +00004943void
Fred Drakeba096332000-07-09 07:04:36 +00004944PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945{
4946 register PyStringObject *s = (PyStringObject *)(*p);
4947 PyObject *t;
4948 if (s == NULL || !PyString_Check(s))
4949 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004950 /* If it's a string subclass, we don't really know what putting
4951 it in the interned dict might do. */
4952 if (!PyString_CheckExact(s))
4953 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004954 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004955 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956 if (interned == NULL) {
4957 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004958 if (interned == NULL) {
4959 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004960 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004962 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004963 t = PyDict_GetItem(interned, (PyObject *)s);
4964 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004965 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004966 Py_DECREF(*p);
4967 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004968 return;
4969 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004970
Armin Rigo79f7ad22004-08-07 19:27:39 +00004971 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004972 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004973 return;
4974 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004975 /* The two references in interned are not counted by refcnt.
4976 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004977 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004978 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004979}
4980
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004981void
4982PyString_InternImmortal(PyObject **p)
4983{
4984 PyString_InternInPlace(p);
4985 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4986 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4987 Py_INCREF(*p);
4988 }
4989}
4990
Guido van Rossum2a61e741997-01-18 07:55:05 +00004991
4992PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004993PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004994{
4995 PyObject *s = PyString_FromString(cp);
4996 if (s == NULL)
4997 return NULL;
4998 PyString_InternInPlace(&s);
4999 return s;
5000}
5001
Guido van Rossum8cf04761997-08-02 02:57:45 +00005002void
Fred Drakeba096332000-07-09 07:04:36 +00005003PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005004{
5005 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005006 for (i = 0; i < UCHAR_MAX + 1; i++) {
5007 Py_XDECREF(characters[i]);
5008 characters[i] = NULL;
5009 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005010 Py_XDECREF(nullstring);
5011 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005012}
Barry Warsawa903ad982001-02-23 16:40:48 +00005013
Barry Warsawa903ad982001-02-23 16:40:48 +00005014void _Py_ReleaseInternedStrings(void)
5015{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005016 PyObject *keys;
5017 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005018 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019
5020 if (interned == NULL || !PyDict_Check(interned))
5021 return;
5022 keys = PyDict_Keys(interned);
5023 if (keys == NULL || !PyList_Check(keys)) {
5024 PyErr_Clear();
5025 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005026 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005027
5028 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5029 detector, interned strings are not forcibly deallocated; rather, we
5030 give them their stolen references back, and then clear and DECREF
5031 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005032
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005033 fprintf(stderr, "releasing interned strings\n");
5034 n = PyList_GET_SIZE(keys);
5035 for (i = 0; i < n; i++) {
5036 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5037 switch (s->ob_sstate) {
5038 case SSTATE_NOT_INTERNED:
5039 /* XXX Shouldn't happen */
5040 break;
5041 case SSTATE_INTERNED_IMMORTAL:
5042 s->ob_refcnt += 1;
5043 break;
5044 case SSTATE_INTERNED_MORTAL:
5045 s->ob_refcnt += 2;
5046 break;
5047 default:
5048 Py_FatalError("Inconsistent interned string state.");
5049 }
5050 s->ob_sstate = SSTATE_NOT_INTERNED;
5051 }
5052 Py_DECREF(keys);
5053 PyDict_Clear(interned);
5054 Py_DECREF(interned);
5055 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005056}