blob: 9f154f3e1e4679f788171ac6708d7a99d1197471 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Eric Smitha9f7d622008-02-17 19:46:49 +00007#include "formatter_string.h"
8
Guido van Rossum013142a1994-08-30 08:19:36 +00009#include <ctype.h>
10
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000011#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
Tim Petersae1d0c92006-03-17 03:29:34 +000023 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000024 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000028/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000029 For both PyString_FromString() and PyString_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000034 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000035
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000036 For PyString_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyString_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyString_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000053*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000055PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Tim Peters9e897f42001-05-09 07:37:07 +000057 register PyStringObject *op;
Gregory P. Smithc00eb732008-04-09 23:16:37 +000058 if (size < 0) {
59 PyErr_SetString(PyExc_SystemError,
60 "Negative size passed to PyString_FromStringAndSize");
61 return NULL;
62 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 if (size == 0 && (op = nullstring) != NULL) {
64#ifdef COUNT_ALLOCS
65 null_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (size == 1 && str != NULL &&
71 (op = characters[*str & UCHAR_MAX]) != NULL)
72 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000079
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000080 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000081 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000084 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000086 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000088 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000090 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105}
106
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000108PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Tim Peters62de65b2001-12-06 20:29:32 +0000110 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000111 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000112
113 assert(str != NULL);
114 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000115 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000116 PyErr_SetString(PyExc_OverflowError,
117 "string is too long for a Python string");
118 return NULL;
119 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 if (size == 0 && (op = nullstring) != NULL) {
121#ifdef COUNT_ALLOCS
122 null_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
127 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
128#ifdef COUNT_ALLOCS
129 one_strings++;
130#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000135 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000136 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000137 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000139 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000141 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000142 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000143 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158}
159
Barry Warsawdadace02001-08-24 18:32:06 +0000160PyObject *
161PyString_FromFormatV(const char *format, va_list vargs)
162{
Tim Petersc15c4f12001-10-02 21:32:07 +0000163 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000164 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000165 const char* f;
166 char *s;
167 PyObject* string;
168
Tim Petersc15c4f12001-10-02 21:32:07 +0000169#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000170 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000171#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#ifdef __va_copy
173 __va_copy(count, vargs);
174#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000175 count = vargs;
176#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000177#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000178 /* step 1: figure out how large a buffer we need */
179 for (f = format; *f; f++) {
180 if (*f == '%') {
181 const char* p = f;
182 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
183 ;
184
Tim Peters8931ff12006-05-13 23:28:20 +0000185 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
186 * they don't affect the amount of space we reserve.
187 */
188 if ((*f == 'l' || *f == 'z') &&
189 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000190 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000191
Barry Warsawdadace02001-08-24 18:32:06 +0000192 switch (*f) {
193 case 'c':
194 (void)va_arg(count, int);
195 /* fall through... */
196 case '%':
197 n++;
198 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000199 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000200 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000201 /* 20 bytes is enough to hold a 64-bit
202 integer. Decimal takes the most space.
203 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000204 n += 20;
205 break;
206 case 's':
207 s = va_arg(count, char*);
208 n += strlen(s);
209 break;
210 case 'p':
211 (void) va_arg(count, int);
212 /* maximum 64-bit pointer representation:
213 * 0xffffffffffffffff
214 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000215 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000216 */
217 n += 19;
218 break;
219 default:
220 /* if we stumble upon an unknown
221 formatting code, copy the rest of
222 the format string to the output
223 string. (we cannot just skip the
224 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000225 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000226 n += strlen(p);
227 goto expand;
228 }
229 } else
230 n++;
231 }
232 expand:
233 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000234 /* Since we've analyzed how much space we need for the worst case,
235 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000236 string = PyString_FromStringAndSize(NULL, n);
237 if (!string)
238 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000239
Barry Warsawdadace02001-08-24 18:32:06 +0000240 s = PyString_AsString(string);
241
242 for (f = format; *f; f++) {
243 if (*f == '%') {
244 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000245 Py_ssize_t i;
246 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000247 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000248 /* parse the width.precision part (we're only
249 interested in the precision value, if any) */
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 if (*f == '.') {
254 f++;
255 n = 0;
256 while (isdigit(Py_CHARMASK(*f)))
257 n = (n*10) + *f++ - '0';
258 }
259 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
260 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000261 /* handle the long flag, but only for %ld and %lu.
262 others can be added when necessary. */
263 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000264 longflag = 1;
265 ++f;
266 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000267 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000268 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000269 size_tflag = 1;
270 ++f;
271 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000272
Barry Warsawdadace02001-08-24 18:32:06 +0000273 switch (*f) {
274 case 'c':
275 *s++ = va_arg(vargs, int);
276 break;
277 case 'd':
278 if (longflag)
279 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000280 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000281 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
282 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000283 else
284 sprintf(s, "%d", va_arg(vargs, int));
285 s += strlen(s);
286 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000287 case 'u':
288 if (longflag)
289 sprintf(s, "%lu",
290 va_arg(vargs, unsigned long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
293 va_arg(vargs, size_t));
294 else
295 sprintf(s, "%u",
296 va_arg(vargs, unsigned int));
297 s += strlen(s);
298 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000299 case 'i':
300 sprintf(s, "%i", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'x':
304 sprintf(s, "%x", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 's':
308 p = va_arg(vargs, char*);
309 i = strlen(p);
310 if (n > 0 && i > n)
311 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000312 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000313 s += i;
314 break;
315 case 'p':
316 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000317 /* %p is ill-defined: ensure leading 0x. */
318 if (s[1] == 'X')
319 s[1] = 'x';
320 else if (s[1] != 'x') {
321 memmove(s+2, s, strlen(s)+1);
322 s[0] = '0';
323 s[1] = 'x';
324 }
Barry Warsawdadace02001-08-24 18:32:06 +0000325 s += strlen(s);
326 break;
327 case '%':
328 *s++ = '%';
329 break;
330 default:
331 strcpy(s, p);
332 s += strlen(s);
333 goto end;
334 }
335 } else
336 *s++ = *f;
337 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000341 return string;
342}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000343
Barry Warsawdadace02001-08-24 18:32:06 +0000344PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000345PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000346{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000347 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000348 va_list vargs;
349
350#ifdef HAVE_STDARG_PROTOTYPES
351 va_start(vargs, format);
352#else
353 va_start(vargs);
354#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000355 ret = PyString_FromFormatV(format, vargs);
356 va_end(vargs);
357 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000358}
359
360
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000362 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363 const char *encoding,
364 const char *errors)
365{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000366 PyObject *v, *str;
367
368 str = PyString_FromStringAndSize(s, size);
369 if (str == NULL)
370 return NULL;
371 v = PyString_AsDecodedString(str, encoding, errors);
372 Py_DECREF(str);
373 return v;
374}
375
376PyObject *PyString_AsDecodedObject(PyObject *str,
377 const char *encoding,
378 const char *errors)
379{
380 PyObject *v;
381
382 if (!PyString_Check(str)) {
383 PyErr_BadArgument();
384 goto onError;
385 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387 if (encoding == NULL) {
388#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000390#else
391 PyErr_SetString(PyExc_ValueError, "no encoding specified");
392 goto onError;
393#endif
394 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395
396 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 v = PyCodec_Decode(str, encoding, errors);
398 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400
401 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000404 return NULL;
405}
406
407PyObject *PyString_AsDecodedString(PyObject *str,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v;
412
413 v = PyString_AsDecodedObject(str, encoding, errors);
414 if (v == NULL)
415 goto onError;
416
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000417#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 /* Convert Unicode to a string using the default encoding */
419 if (PyUnicode_Check(v)) {
420 PyObject *temp = v;
421 v = PyUnicode_AsEncodedString(v, NULL, NULL);
422 Py_DECREF(temp);
423 if (v == NULL)
424 goto onError;
425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000426#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427 if (!PyString_Check(v)) {
428 PyErr_Format(PyExc_TypeError,
429 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000430 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000431 Py_DECREF(v);
432 goto onError;
433 }
434
435 return v;
436
437 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 return NULL;
439}
440
441PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000442 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000447
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000448 str = PyString_FromStringAndSize(s, size);
449 if (str == NULL)
450 return NULL;
451 v = PyString_AsEncodedString(str, encoding, errors);
452 Py_DECREF(str);
453 return v;
454}
455
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 const char *encoding,
458 const char *errors)
459{
460 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000461
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(str)) {
463 PyErr_BadArgument();
464 goto onError;
465 }
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467 if (encoding == NULL) {
468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000470#else
471 PyErr_SetString(PyExc_ValueError, "no encoding specified");
472 goto onError;
473#endif
474 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475
476 /* Encode via the codec registry */
477 v = PyCodec_Encode(str, encoding, errors);
478 if (v == NULL)
479 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000480
481 return v;
482
483 onError:
484 return NULL;
485}
486
487PyObject *PyString_AsEncodedString(PyObject *str,
488 const char *encoding,
489 const char *errors)
490{
491 PyObject *v;
492
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000493 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494 if (v == NULL)
495 goto onError;
496
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000497#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000498 /* Convert Unicode to a string using the default encoding */
499 if (PyUnicode_Check(v)) {
500 PyObject *temp = v;
501 v = PyUnicode_AsEncodedString(v, NULL, NULL);
502 Py_DECREF(temp);
503 if (v == NULL)
504 goto onError;
505 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000506#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 if (!PyString_Check(v)) {
508 PyErr_Format(PyExc_TypeError,
509 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000510 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 Py_DECREF(v);
512 goto onError;
513 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000514
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000515 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000516
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000517 onError:
518 return NULL;
519}
520
Guido van Rossum234f9421993-06-17 12:35:49 +0000521static void
Fred Drakeba096332000-07-09 07:04:36 +0000522string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000524 switch (PyString_CHECK_INTERNED(op)) {
525 case SSTATE_NOT_INTERNED:
526 break;
527
528 case SSTATE_INTERNED_MORTAL:
529 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000530 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000531 if (PyDict_DelItem(interned, op) != 0)
532 Py_FatalError(
533 "deletion of interned string failed");
534 break;
535
536 case SSTATE_INTERNED_IMMORTAL:
537 Py_FatalError("Immortal interned string died.");
538
539 default:
540 Py_FatalError("Inconsistent interned string state.");
541 }
Christian Heimese93237d2007-12-19 02:37:44 +0000542 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000543}
544
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545/* Unescape a backslash-escaped string. If unicode is non-zero,
546 the string is a u-literal. If recode_encoding is non-zero,
547 the string is UTF-8 encoded and should be re-encoded in the
548 specified encoding. */
549
550PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000551 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000552 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 const char *recode_encoding)
555{
556 int c;
557 char *p, *buf;
558 const char *end;
559 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000560 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000561 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000562 if (v == NULL)
563 return NULL;
564 p = buf = PyString_AsString(v);
565 end = s + len;
566 while (s < end) {
567 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000568 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569#ifdef Py_USING_UNICODE
570 if (recode_encoding && (*s & 0x80)) {
571 PyObject *u, *w;
572 char *r;
573 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000574 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575 t = s;
576 /* Decode non-ASCII bytes as UTF-8. */
577 while (t < end && (*t & 0x80)) t++;
578 u = PyUnicode_DecodeUTF8(s, t - s, errors);
579 if(!u) goto failed;
580
581 /* Recode them in target encoding. */
582 w = PyUnicode_AsEncodedString(
583 u, recode_encoding, errors);
584 Py_DECREF(u);
585 if (!w) goto failed;
586
587 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000588 assert(PyString_Check(w));
589 r = PyString_AS_STRING(w);
590 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000591 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000592 p += rn;
593 Py_DECREF(w);
594 s = t;
595 } else {
596 *p++ = *s++;
597 }
598#else
599 *p++ = *s++;
600#endif
601 continue;
602 }
603 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000604 if (s==end) {
605 PyErr_SetString(PyExc_ValueError,
606 "Trailing \\ in string");
607 goto failed;
608 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000609 switch (*s++) {
610 /* XXX This assumes ASCII! */
611 case '\n': break;
612 case '\\': *p++ = '\\'; break;
613 case '\'': *p++ = '\''; break;
614 case '\"': *p++ = '\"'; break;
615 case 'b': *p++ = '\b'; break;
616 case 'f': *p++ = '\014'; break; /* FF */
617 case 't': *p++ = '\t'; break;
618 case 'n': *p++ = '\n'; break;
619 case 'r': *p++ = '\r'; break;
620 case 'v': *p++ = '\013'; break; /* VT */
621 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
622 case '0': case '1': case '2': case '3':
623 case '4': case '5': case '6': case '7':
624 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000625 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 c = (c<<3) + *s++ - '0';
629 }
630 *p++ = c;
631 break;
632 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000633 if (s+1 < end &&
634 isxdigit(Py_CHARMASK(s[0])) &&
635 isxdigit(Py_CHARMASK(s[1])))
636 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000637 unsigned int x = 0;
638 c = Py_CHARMASK(*s);
639 s++;
640 if (isdigit(c))
641 x = c - '0';
642 else if (islower(c))
643 x = 10 + c - 'a';
644 else
645 x = 10 + c - 'A';
646 x = x << 4;
647 c = Py_CHARMASK(*s);
648 s++;
649 if (isdigit(c))
650 x += c - '0';
651 else if (islower(c))
652 x += 10 + c - 'a';
653 else
654 x += 10 + c - 'A';
655 *p++ = x;
656 break;
657 }
658 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000659 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000660 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000661 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 }
663 if (strcmp(errors, "replace") == 0) {
664 *p++ = '?';
665 } else if (strcmp(errors, "ignore") == 0)
666 /* do nothing */;
667 else {
668 PyErr_Format(PyExc_ValueError,
669 "decoding error; "
670 "unknown error handling code: %.400s",
671 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000672 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674#ifndef Py_USING_UNICODE
675 case 'u':
676 case 'U':
677 case 'N':
678 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000679 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 "Unicode escapes not legal "
681 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000682 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684#endif
685 default:
686 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000687 s--;
688 goto non_esc; /* an arbitry number of unescaped
689 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000690 }
691 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000692 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 return v;
695 failed:
696 Py_DECREF(v);
697 return NULL;
698}
699
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000700/* -------------------------------------------------------------------- */
701/* object api */
702
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704string_getsize(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return -1;
710 return len;
711}
712
713static /*const*/ char *
714string_getbuffer(register PyObject *op)
715{
716 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyString_AsStringAndSize(op, &s, &len))
719 return NULL;
720 return s;
721}
722
Martin v. Löwis18e16552006-02-15 17:27:45 +0000723Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000728 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
731/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000732PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000734 if (!PyString_Check(op))
735 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737}
738
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739int
740PyString_AsStringAndSize(register PyObject *obj,
741 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000742 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743{
744 if (s == NULL) {
745 PyErr_BadInternalCall();
746 return -1;
747 }
748
749 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000750#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 if (PyUnicode_Check(obj)) {
752 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
753 if (obj == NULL)
754 return -1;
755 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000756 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000757#endif
758 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_Format(PyExc_TypeError,
760 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000761 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 return -1;
763 }
764 }
765
766 *s = PyString_AS_STRING(obj);
767 if (len != NULL)
768 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000769 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000770 PyErr_SetString(PyExc_TypeError,
771 "expected string without null bytes");
772 return -1;
773 }
774 return 0;
775}
776
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000778/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000779
Eric Smitha9f7d622008-02-17 19:46:49 +0000780#include "stringlib/stringdefs.h"
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Brett Cannon01531592007-09-17 03:28:34 +0000791 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000808 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000809 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000810 while (size > INT_MAX) {
811 /* Very long strings cannot be written atomically.
812 * But don't write exactly INT_MAX bytes at a time
813 * to avoid memory aligment issues.
814 */
815 const int chunk_size = INT_MAX & ~0x3FFF;
816 fwrite(data, 1, chunk_size, fp);
817 data += chunk_size;
818 size -= chunk_size;
819 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000820#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000821 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#endif
Brett Cannon01531592007-09-17 03:28:34 +0000825 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000831 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
832 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
Christian Heimese93237d2007-12-19 02:37:44 +0000835 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000836 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000838 for (i = 0; i < str_len; i++) {
839 /* Since strings are immutable and the caller should have a
840 reference, accessing the interal buffer should not be an issue
841 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000843 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\r");
851 else if (c < ' ' || c >= 0x7f)
852 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000853 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000857 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000858 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859}
860
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000861PyObject *
862PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000865 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000866 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000867 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyErr_SetString(PyExc_OverflowError,
869 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000870 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000871 }
872 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000874 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 }
876 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000877 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878 register char c;
879 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000880 int quote;
881
Thomas Wouters7e474022000-07-16 12:04:32 +0000882 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000884 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000885 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
886 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 quote = '"';
888
Tim Peters9161c8b2001-12-03 01:55:38 +0000889 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000890 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000891 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000892 /* There's at least enough room for a hex escape
893 and a closing quote. */
894 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000896 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000898 else if (c == '\t')
899 *p++ = '\\', *p++ = 't';
900 else if (c == '\n')
901 *p++ = '\\', *p++ = 'n';
902 else if (c == '\r')
903 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000904 else if (c < ' ' || c >= 0x7f) {
905 /* For performance, we don't want to call
906 PyOS_snprintf here (extra layers of
907 function call). */
908 sprintf(p, "\\x%02x", c & 0xff);
909 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000910 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000911 else
912 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000914 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000915 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000918 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000921}
922
Guido van Rossum189f1df2001-05-01 16:51:53 +0000923static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000924string_repr(PyObject *op)
925{
926 return PyString_Repr(op, 1);
927}
928
929static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000930string_str(PyObject *s)
931{
Tim Petersc9933152001-10-16 20:18:24 +0000932 assert(PyString_Check(s));
933 if (PyString_CheckExact(s)) {
934 Py_INCREF(s);
935 return s;
936 }
937 else {
938 /* Subtype -- return genuine string with the same value. */
939 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000940 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000941 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000942}
943
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000945string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946{
Christian Heimese93237d2007-12-19 02:37:44 +0000947 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948}
949
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000951string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952{
Andrew Dalke598710c2006-05-25 18:18:39 +0000953 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 register PyStringObject *op;
955 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000956#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000957 if (PyUnicode_Check(bb))
958 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000959#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000960 if (PyBytes_Check(bb))
961 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000963 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000964 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 return NULL;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000969 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000971 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000972 Py_INCREF(bb);
973 return bb;
974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 Py_INCREF(a);
976 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977 }
Christian Heimese93237d2007-12-19 02:37:44 +0000978 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000979 if (size < 0) {
980 PyErr_SetString(PyExc_OverflowError,
981 "strings are too large to concat");
982 return NULL;
983 }
984
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000985 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000986 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000987 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000989 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000990 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000991 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000992 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
993 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000994 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996#undef b
997}
998
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 register Py_ssize_t i;
1003 register Py_ssize_t j;
1004 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001006 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007 if (n < 0)
1008 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001009 /* watch out for overflows: the size can overflow int,
1010 * and the # of bytes needed can overflow size_t
1011 */
Christian Heimese93237d2007-12-19 02:37:44 +00001012 size = Py_SIZE(a) * n;
1013 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001014 PyErr_SetString(PyExc_OverflowError,
1015 "repeated string is too long");
1016 return NULL;
1017 }
Christian Heimese93237d2007-12-19 02:37:44 +00001018 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 Py_INCREF(a);
1020 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 }
Tim Peterse7c05322004-06-27 17:24:49 +00001022 nbytes = (size_t)size;
1023 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "repeated string is too long");
1026 return NULL;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001029 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001030 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001032 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001033 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001034 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001035 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001036 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001037 memset(op->ob_sval, a->ob_sval[0] , n);
1038 return (PyObject *) op;
1039 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i = 0;
1041 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001042 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1043 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 }
1045 while (i < size) {
1046 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001047 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 i += j;
1049 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001050 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
1053/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001056string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001058 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
1060 if (i < 0)
1061 i = 0;
1062 if (j < 0)
1063 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001064 if (j > Py_SIZE(a))
1065 j = Py_SIZE(a);
1066 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001067 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 Py_INCREF(a);
1069 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 }
1071 if (j < i)
1072 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074}
1075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001078{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001079 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (PyUnicode_Check(sub_obj))
1082 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001083#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001085 PyErr_Format(PyExc_TypeError,
1086 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001087 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001088 return -1;
1089 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001090 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001091
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001093}
1094
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001096string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001100 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001101 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001102 return NULL;
1103 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 pchar = a->ob_sval[i];
1105 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001106 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001107 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 else {
1109#ifdef COUNT_ALLOCS
1110 one_strings++;
1111#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001112 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001113 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001114 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115}
1116
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117static PyObject*
1118string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001121 Py_ssize_t len_a, len_b;
1122 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 PyObject *result;
1124
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001125 /* Make sure both arguments are strings. */
1126 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 result = Py_NotImplemented;
1128 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 if (a == b) {
1131 switch (op) {
1132 case Py_EQ:case Py_LE:case Py_GE:
1133 result = Py_True;
1134 goto out;
1135 case Py_NE:case Py_LT:case Py_GT:
1136 result = Py_False;
1137 goto out;
1138 }
1139 }
1140 if (op == Py_EQ) {
1141 /* Supporting Py_NE here as well does not save
1142 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001143 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001144 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001145 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 result = Py_True;
1147 } else {
1148 result = Py_False;
1149 }
1150 goto out;
1151 }
Christian Heimese93237d2007-12-19 02:37:44 +00001152 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 min_len = (len_a < len_b) ? len_a : len_b;
1154 if (min_len > 0) {
1155 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1156 if (c==0)
1157 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001158 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 c = 0;
1160 if (c == 0)
1161 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1162 switch (op) {
1163 case Py_LT: c = c < 0; break;
1164 case Py_LE: c = c <= 0; break;
1165 case Py_EQ: assert(0); break; /* unreachable */
1166 case Py_NE: c = c != 0; break;
1167 case Py_GT: c = c > 0; break;
1168 case Py_GE: c = c >= 0; break;
1169 default:
1170 result = Py_NotImplemented;
1171 goto out;
1172 }
1173 result = c ? Py_True : Py_False;
1174 out:
1175 Py_INCREF(result);
1176 return result;
1177}
1178
1179int
1180_PyString_Eq(PyObject *o1, PyObject *o2)
1181{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001182 PyStringObject *a = (PyStringObject*) o1;
1183 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001184 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001185 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001186 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001187}
1188
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189static long
Fred Drakeba096332000-07-09 07:04:36 +00001190string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 register unsigned char *p;
1194 register long x;
1195
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 if (a->ob_shash != -1)
1197 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001198 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001199 p = (unsigned char *) a->ob_sval;
1200 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001202 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001203 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 if (x == -1)
1205 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001206 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207 return x;
1208}
1209
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210static PyObject*
1211string_subscript(PyStringObject* self, PyObject* item)
1212{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001213 if (PyIndex_Check(item)) {
1214 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 if (i == -1 && PyErr_Occurred())
1216 return NULL;
1217 if (i < 0)
1218 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001219 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 }
1221 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001222 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 char* source_buf;
1224 char* result_buf;
1225 PyObject* result;
1226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 PyString_GET_SIZE(self),
1229 &start, &stop, &step, &slicelength) < 0) {
1230 return NULL;
1231 }
1232
1233 if (slicelength <= 0) {
1234 return PyString_FromStringAndSize("", 0);
1235 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001236 else if (start == 0 && step == 1 &&
1237 slicelength == PyString_GET_SIZE(self) &&
1238 PyString_CheckExact(self)) {
1239 Py_INCREF(self);
1240 return (PyObject *)self;
1241 }
1242 else if (step == 1) {
1243 return PyString_FromStringAndSize(
1244 PyString_AS_STRING(self) + start,
1245 slicelength);
1246 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001265 PyErr_Format(PyExc_TypeError,
1266 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001267 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 return NULL;
1269 }
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001277 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279 }
1280 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001281 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286{
Guido van Rossum045e6881997-09-08 18:30:11 +00001287 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001288 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001289 return -1;
1290}
1291
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292static Py_ssize_t
1293string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294{
1295 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001296 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297 return 1;
1298}
1299
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300static Py_ssize_t
1301string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001302{
1303 if ( index != 0 ) {
1304 PyErr_SetString(PyExc_SystemError,
1305 "accessing non-existent string segment");
1306 return -1;
1307 }
1308 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001309 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001310}
1311
Christian Heimes1a6387e2008-03-26 12:49:49 +00001312static int
1313string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1314{
1315 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1316 0, flags);
1317}
1318
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001319static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001320 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001321 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001322 (ssizeargfunc)string_repeat, /*sq_repeat*/
1323 (ssizeargfunc)string_item, /*sq_item*/
1324 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001325 0, /*sq_ass_item*/
1326 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001327 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001328};
1329
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001330static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001331 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332 (binaryfunc)string_subscript,
1333 0,
1334};
1335
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001336static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001337 (readbufferproc)string_buffer_getreadbuf,
1338 (writebufferproc)string_buffer_getwritebuf,
1339 (segcountproc)string_buffer_getsegcount,
1340 (charbufferproc)string_buffer_getcharbuf,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001341 (getbufferproc)string_buffer_getbuffer,
1342 0, /* XXX */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001343};
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345
1346
1347#define LEFTSTRIP 0
1348#define RIGHTSTRIP 1
1349#define BOTHSTRIP 2
1350
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001351/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001352static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1353
1354#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001355
Andrew Dalke525eab32006-05-26 14:00:45 +00001356
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001357/* Don't call if length < 2 */
1358#define Py_STRING_MATCH(target, offset, pattern, length) \
1359 (target[offset] == pattern[0] && \
1360 target[offset+length-1] == pattern[length-1] && \
1361 !memcmp(target+offset+1, pattern+1, length-2) )
1362
1363
Andrew Dalke525eab32006-05-26 14:00:45 +00001364/* Overallocate the initial list to reduce the number of reallocs for small
1365 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1366 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1367 text (roughly 11 words per line) and field delimited data (usually 1-10
1368 fields). For large strings the split algorithms are bandwidth limited
1369 so increasing the preallocation likely will not improve things.*/
1370
1371#define MAX_PREALLOC 12
1372
1373/* 5 splits gives 6 elements */
1374#define PREALLOC_SIZE(maxsplit) \
1375 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1376
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377#define SPLIT_APPEND(data, left, right) \
1378 str = PyString_FromStringAndSize((data) + (left), \
1379 (right) - (left)); \
1380 if (str == NULL) \
1381 goto onError; \
1382 if (PyList_Append(list, str)) { \
1383 Py_DECREF(str); \
1384 goto onError; \
1385 } \
1386 else \
1387 Py_DECREF(str);
1388
Andrew Dalke02758d62006-05-26 15:21:01 +00001389#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001390 str = PyString_FromStringAndSize((data) + (left), \
1391 (right) - (left)); \
1392 if (str == NULL) \
1393 goto onError; \
1394 if (count < MAX_PREALLOC) { \
1395 PyList_SET_ITEM(list, count, str); \
1396 } else { \
1397 if (PyList_Append(list, str)) { \
1398 Py_DECREF(str); \
1399 goto onError; \
1400 } \
1401 else \
1402 Py_DECREF(str); \
1403 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001404 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001405
1406/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001407#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001408
Andrew Dalke02758d62006-05-26 15:21:01 +00001409#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1410#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1411#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1412#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1413
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001414Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001415split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416{
Skip Montanaro26015492007-12-08 15:33:24 +00001417 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001418 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001420 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
1422 if (list == NULL)
1423 return NULL;
1424
Andrew Dalke02758d62006-05-26 15:21:01 +00001425 i = j = 0;
1426
1427 while (maxsplit-- > 0) {
1428 SKIP_SPACE(s, i, len);
1429 if (i==len) break;
1430 j = i; i++;
1431 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001432 if (j == 0 && i == len && PyString_CheckExact(self)) {
1433 /* No whitespace in self, so just use it as list[0] */
1434 Py_INCREF(self);
1435 PyList_SET_ITEM(list, 0, (PyObject *)self);
1436 count++;
1437 break;
1438 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001439 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001441
1442 if (i < len) {
1443 /* Only occurs when maxsplit was reached */
1444 /* Skip any remaining whitespace and copy to end of string */
1445 SKIP_SPACE(s, i, len);
1446 if (i != len)
1447 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001448 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001449 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 Py_DECREF(list);
1453 return NULL;
1454}
1455
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001456Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001457split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001458{
Skip Montanaro26015492007-12-08 15:33:24 +00001459 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001460 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463
1464 if (list == NULL)
1465 return NULL;
1466
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001467 i = j = 0;
1468 while ((j < len) && (maxcount-- > 0)) {
1469 for(; j<len; j++) {
1470 /* I found that using memchr makes no difference */
1471 if (s[j] == ch) {
1472 SPLIT_ADD(s, i, j);
1473 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001475 }
1476 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 }
Skip Montanaro26015492007-12-08 15:33:24 +00001478 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1479 /* ch not in self, so just use self as list[0] */
1480 Py_INCREF(self);
1481 PyList_SET_ITEM(list, 0, (PyObject *)self);
1482 count++;
1483 }
1484 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001485 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001487 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 return list;
1489
1490 onError:
1491 Py_DECREF(list);
1492 return NULL;
1493}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001495PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496"S.split([sep [,maxsplit]]) -> list of strings\n\
1497\n\
1498Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001500splits are done. If sep is not specified or is None, any\n\
1501whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502
1503static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001504string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001506 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001507 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001508 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001509 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001510#ifdef USE_FAST
1511 Py_ssize_t pos;
1512#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513
Martin v. Löwis9c830762006-04-13 08:37:17 +00001514 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001517 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001518 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001519 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001520 if (PyString_Check(subobj)) {
1521 sub = PyString_AS_STRING(subobj);
1522 n = PyString_GET_SIZE(subobj);
1523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001524#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001525 else if (PyUnicode_Check(subobj))
1526 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001527#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1529 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001530
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 if (n == 0) {
1532 PyErr_SetString(PyExc_ValueError, "empty separator");
1533 return NULL;
1534 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001535 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001536 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537
Andrew Dalke525eab32006-05-26 14:00:45 +00001538 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 if (list == NULL)
1540 return NULL;
1541
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001542#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001544 while (maxsplit-- > 0) {
1545 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1546 if (pos < 0)
1547 break;
1548 j = i+pos;
1549 SPLIT_ADD(s, i, j);
1550 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001552#else
1553 i = j = 0;
1554 while ((j+n <= len) && (maxsplit-- > 0)) {
1555 for (; j+n <= len; j++) {
1556 if (Py_STRING_MATCH(s, j, sub, n)) {
1557 SPLIT_ADD(s, i, j);
1558 i = j = j + n;
1559 break;
1560 }
1561 }
1562 }
1563#endif
1564 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001565 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 return list;
1567
Andrew Dalke525eab32006-05-26 14:00:45 +00001568 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569 Py_DECREF(list);
1570 return NULL;
1571}
1572
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001573PyDoc_STRVAR(partition__doc__,
1574"S.partition(sep) -> (head, sep, tail)\n\
1575\n\
1576Searches for the separator sep in S, and returns the part before it,\n\
1577the separator itself, and the part after it. If the separator is not\n\
1578found, returns S and two empty strings.");
1579
1580static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001581string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001582{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001583 const char *sep;
1584 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001585
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001586 if (PyString_Check(sep_obj)) {
1587 sep = PyString_AS_STRING(sep_obj);
1588 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001589 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001590#ifdef Py_USING_UNICODE
1591 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001592 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001593#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001594 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001595 return NULL;
1596
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001597 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001598 (PyObject*) self,
1599 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1600 sep_obj, sep, sep_len
1601 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001602}
1603
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001604PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001605"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001606\n\
1607Searches for the separator sep in S, starting at the end of S, and returns\n\
1608the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001609separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001610
1611static PyObject *
1612string_rpartition(PyStringObject *self, PyObject *sep_obj)
1613{
1614 const char *sep;
1615 Py_ssize_t sep_len;
1616
1617 if (PyString_Check(sep_obj)) {
1618 sep = PyString_AS_STRING(sep_obj);
1619 sep_len = PyString_GET_SIZE(sep_obj);
1620 }
1621#ifdef Py_USING_UNICODE
1622 else if (PyUnicode_Check(sep_obj))
1623 return PyUnicode_Partition((PyObject *) self, sep_obj);
1624#endif
1625 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1626 return NULL;
1627
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001628 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001629 (PyObject*) self,
1630 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1631 sep_obj, sep, sep_len
1632 );
1633}
1634
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001635Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001636rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001637{
Skip Montanaro26015492007-12-08 15:33:24 +00001638 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001639 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001640 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001641 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001642
1643 if (list == NULL)
1644 return NULL;
1645
Andrew Dalke02758d62006-05-26 15:21:01 +00001646 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001647
Andrew Dalke02758d62006-05-26 15:21:01 +00001648 while (maxsplit-- > 0) {
1649 RSKIP_SPACE(s, i);
1650 if (i<0) break;
1651 j = i; i--;
1652 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001653 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1654 /* No whitespace in self, so just use it as list[0] */
1655 Py_INCREF(self);
1656 PyList_SET_ITEM(list, 0, (PyObject *)self);
1657 count++;
1658 break;
1659 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001660 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001661 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001662 if (i >= 0) {
1663 /* Only occurs when maxsplit was reached */
1664 /* Skip any remaining whitespace and copy to beginning of string */
1665 RSKIP_SPACE(s, i);
1666 if (i >= 0)
1667 SPLIT_ADD(s, 0, i + 1);
1668
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001669 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001670 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001671 if (PyList_Reverse(list) < 0)
1672 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001673 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001674 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675 Py_DECREF(list);
1676 return NULL;
1677}
1678
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001679Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001680rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001681{
Skip Montanaro26015492007-12-08 15:33:24 +00001682 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001683 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001684 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001685 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001686
1687 if (list == NULL)
1688 return NULL;
1689
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001690 i = j = len - 1;
1691 while ((i >= 0) && (maxcount-- > 0)) {
1692 for (; i >= 0; i--) {
1693 if (s[i] == ch) {
1694 SPLIT_ADD(s, i + 1, j + 1);
1695 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001696 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001697 }
1698 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001699 }
Skip Montanaro26015492007-12-08 15:33:24 +00001700 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1701 /* ch not in self, so just use self as list[0] */
1702 Py_INCREF(self);
1703 PyList_SET_ITEM(list, 0, (PyObject *)self);
1704 count++;
1705 }
1706 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001707 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001708 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001709 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001710 if (PyList_Reverse(list) < 0)
1711 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001712 return list;
1713
1714 onError:
1715 Py_DECREF(list);
1716 return NULL;
1717}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001718
1719PyDoc_STRVAR(rsplit__doc__,
1720"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1721\n\
1722Return a list of the words in the string S, using sep as the\n\
1723delimiter string, starting at the end of the string and working\n\
1724to the front. If maxsplit is given, at most maxsplit splits are\n\
1725done. If sep is not specified or is None, any whitespace string\n\
1726is a separator.");
1727
1728static PyObject *
1729string_rsplit(PyStringObject *self, PyObject *args)
1730{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001731 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001732 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001733 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001734 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001735
Martin v. Löwis9c830762006-04-13 08:37:17 +00001736 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001737 return NULL;
1738 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001739 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001740 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001741 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 if (PyString_Check(subobj)) {
1743 sub = PyString_AS_STRING(subobj);
1744 n = PyString_GET_SIZE(subobj);
1745 }
1746#ifdef Py_USING_UNICODE
1747 else if (PyUnicode_Check(subobj))
1748 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1749#endif
1750 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1751 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001752
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001753 if (n == 0) {
1754 PyErr_SetString(PyExc_ValueError, "empty separator");
1755 return NULL;
1756 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001757 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001758 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001759
Andrew Dalke525eab32006-05-26 14:00:45 +00001760 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001761 if (list == NULL)
1762 return NULL;
1763
1764 j = len;
1765 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001766
Skip Montanaro26015492007-12-08 15:33:24 +00001767 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001768 while ( (i >= 0) && (maxsplit-- > 0) ) {
1769 for (; i>=0; i--) {
1770 if (Py_STRING_MATCH(s, i, sub, n)) {
1771 SPLIT_ADD(s, i + n, j);
1772 j = i;
1773 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001774 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001775 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001776 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001777 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001778 SPLIT_ADD(s, 0, j);
1779 FIX_PREALLOC_SIZE(list);
1780 if (PyList_Reverse(list) < 0)
1781 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001782 return list;
1783
Andrew Dalke525eab32006-05-26 14:00:45 +00001784onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001785 Py_DECREF(list);
1786 return NULL;
1787}
1788
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791"S.join(sequence) -> string\n\
1792\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001793Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001794sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795
1796static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001797string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798{
1799 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001800 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001803 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001805 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001806 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 seq = PySequence_Fast(orig, "");
1809 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001810 return NULL;
1811 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001812
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001813 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 if (seqlen == 0) {
1815 Py_DECREF(seq);
1816 return PyString_FromString("");
1817 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001819 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001820 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1821 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001823 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001824 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826
Raymond Hettinger674f2412004-08-23 23:23:54 +00001827 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001828 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001829 * Do a pre-pass to figure out the total amount of space we'll
1830 * need (sz), see whether any argument is absurd, and defer to
1831 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001832 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001833 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001834 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001835 item = PySequence_Fast_GET_ITEM(seq, i);
1836 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001837#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001838 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001839 /* Defer to Unicode join.
1840 * CAUTION: There's no gurantee that the
1841 * original sequence can be iterated over
1842 * again, so we must pass seq here.
1843 */
1844 PyObject *result;
1845 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001846 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001847 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001848 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001849#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001850 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001851 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001852 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001853 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001854 Py_DECREF(seq);
1855 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001856 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001857 sz += PyString_GET_SIZE(item);
1858 if (i != 0)
1859 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001860 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001861 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001862 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001863 Py_DECREF(seq);
1864 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001866 }
1867
1868 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001869 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001870 if (res == NULL) {
1871 Py_DECREF(seq);
1872 return NULL;
1873 }
1874
1875 /* Catenate everything. */
1876 p = PyString_AS_STRING(res);
1877 for (i = 0; i < seqlen; ++i) {
1878 size_t n;
1879 item = PySequence_Fast_GET_ITEM(seq, i);
1880 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001881 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001882 p += n;
1883 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001884 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001885 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001886 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001888
Jeremy Hylton49048292000-07-11 03:28:17 +00001889 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891}
1892
Tim Peters52e155e2001-06-16 05:42:57 +00001893PyObject *
1894_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001895{
Tim Petersa7259592001-06-16 05:11:17 +00001896 assert(sep != NULL && PyString_Check(sep));
1897 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001898 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001899}
1900
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001901Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001903{
1904 if (*end > len)
1905 *end = len;
1906 else if (*end < 0)
1907 *end += len;
1908 if (*end < 0)
1909 *end = 0;
1910 if (*start < 0)
1911 *start += len;
1912 if (*start < 0)
1913 *start = 0;
1914}
1915
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001916Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001917string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001919 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001920 const char *sub;
1921 Py_ssize_t sub_len;
1922 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001923 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
Facundo Batista57d56692007-11-16 18:04:14 +00001925 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1926 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001927 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001928 /* To support None in "start" and "end" arguments, meaning
1929 the same as if they were not passed.
1930 */
1931 if (obj_start != Py_None)
1932 if (!_PyEval_SliceIndex(obj_start, &start))
1933 return -2;
1934 if (obj_end != Py_None)
1935 if (!_PyEval_SliceIndex(obj_end, &end))
1936 return -2;
1937
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 if (PyString_Check(subobj)) {
1939 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001940 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001942#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001944 return PyUnicode_Find(
1945 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001946#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001947 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001948 /* XXX - the "expected a character buffer object" is pretty
1949 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 return -2;
1951
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001952 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001953 return stringlib_find_slice(
1954 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1955 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001956 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001957 return stringlib_rfind_slice(
1958 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1959 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960}
1961
1962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001963PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964"S.find(sub [,start [,end]]) -> int\n\
1965\n\
1966Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001967such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968arguments start and end are interpreted as in slice notation.\n\
1969\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971
1972static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001973string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 if (result == -2)
1977 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001978 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979}
1980
1981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001982PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983"S.index(sub [,start [,end]]) -> int\n\
1984\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001985Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986
1987static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001988string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001990 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991 if (result == -2)
1992 return NULL;
1993 if (result == -1) {
1994 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001995 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996 return NULL;
1997 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001998 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999}
2000
2001
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002002PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003"S.rfind(sub [,start [,end]]) -> int\n\
2004\n\
2005Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00002006such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007arguments start and end are interpreted as in slice notation.\n\
2008\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002009Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010
2011static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002012string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 if (result == -2)
2016 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002017 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018}
2019
2020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022"S.rindex(sub [,start [,end]]) -> int\n\
2023\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025
2026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002027string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002029 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030 if (result == -2)
2031 return NULL;
2032 if (result == -1) {
2033 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002034 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 return NULL;
2036 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002037 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038}
2039
2040
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002041Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002042do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2043{
2044 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002045 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002046 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002047 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2048 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002049
2050 i = 0;
2051 if (striptype != RIGHTSTRIP) {
2052 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2053 i++;
2054 }
2055 }
2056
2057 j = len;
2058 if (striptype != LEFTSTRIP) {
2059 do {
2060 j--;
2061 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2062 j++;
2063 }
2064
2065 if (i == 0 && j == len && PyString_CheckExact(self)) {
2066 Py_INCREF(self);
2067 return (PyObject*)self;
2068 }
2069 else
2070 return PyString_FromStringAndSize(s+i, j-i);
2071}
2072
2073
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002074Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002075do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076{
2077 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002078 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080 i = 0;
2081 if (striptype != RIGHTSTRIP) {
2082 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2083 i++;
2084 }
2085 }
2086
2087 j = len;
2088 if (striptype != LEFTSTRIP) {
2089 do {
2090 j--;
2091 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2092 j++;
2093 }
2094
Tim Peters8fa5dd02001-09-12 02:18:30 +00002095 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096 Py_INCREF(self);
2097 return (PyObject*)self;
2098 }
2099 else
2100 return PyString_FromStringAndSize(s+i, j-i);
2101}
2102
2103
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002104Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002105do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2106{
2107 PyObject *sep = NULL;
2108
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002109 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110 return NULL;
2111
2112 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002113 if (PyString_Check(sep))
2114 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002115#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002116 else if (PyUnicode_Check(sep)) {
2117 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2118 PyObject *res;
2119 if (uniself==NULL)
2120 return NULL;
2121 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2122 striptype, sep);
2123 Py_DECREF(uniself);
2124 return res;
2125 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002126#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002127 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002128#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002129 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002130#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002131 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002132#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002133 STRIPNAME(striptype));
2134 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002135 }
2136
2137 return do_strip(self, striptype);
2138}
2139
2140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002142"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143\n\
2144Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002145whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002146If chars is given and not None, remove characters in chars instead.\n\
2147If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148
2149static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002150string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002152 if (PyTuple_GET_SIZE(args) == 0)
2153 return do_strip(self, BOTHSTRIP); /* Common case */
2154 else
2155 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156}
2157
2158
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002160"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002162Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002163If chars is given and not None, remove characters in chars instead.\n\
2164If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
2166static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002167string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002169 if (PyTuple_GET_SIZE(args) == 0)
2170 return do_strip(self, LEFTSTRIP); /* Common case */
2171 else
2172 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173}
2174
2175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002176PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002177"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002179Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002180If chars is given and not None, remove characters in chars instead.\n\
2181If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182
2183static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002184string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002186 if (PyTuple_GET_SIZE(args) == 0)
2187 return do_strip(self, RIGHTSTRIP); /* Common case */
2188 else
2189 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190}
2191
2192
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194"S.lower() -> string\n\
2195\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002196Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002198/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2199#ifndef _tolower
2200#define _tolower tolower
2201#endif
2202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002204string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002206 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002207 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002208 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002210 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002211 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002213
2214 s = PyString_AS_STRING(newobj);
2215
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002216 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002217
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002219 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002220 if (isupper(c))
2221 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002223
Anthony Baxtera6286212006-04-11 07:42:36 +00002224 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225}
2226
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002227PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228"S.upper() -> string\n\
2229\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002230Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002232#ifndef _toupper
2233#define _toupper toupper
2234#endif
2235
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002237string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002239 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002240 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002241 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002243 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002244 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002246
2247 s = PyString_AS_STRING(newobj);
2248
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002249 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002250
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002252 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002253 if (islower(c))
2254 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002256
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261"S.title() -> string\n\
2262\n\
2263Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002264characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265
2266static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002267string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268{
2269 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002270 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002272 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273
Anthony Baxtera6286212006-04-11 07:42:36 +00002274 newobj = PyString_FromStringAndSize(NULL, n);
2275 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002277 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 for (i = 0; i < n; i++) {
2279 int c = Py_CHARMASK(*s++);
2280 if (islower(c)) {
2281 if (!previous_is_cased)
2282 c = toupper(c);
2283 previous_is_cased = 1;
2284 } else if (isupper(c)) {
2285 if (previous_is_cased)
2286 c = tolower(c);
2287 previous_is_cased = 1;
2288 } else
2289 previous_is_cased = 0;
2290 *s_new++ = c;
2291 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002292 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293}
2294
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296"S.capitalize() -> string\n\
2297\n\
2298Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002299capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300
2301static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002302string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303{
2304 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002305 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002306 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307
Anthony Baxtera6286212006-04-11 07:42:36 +00002308 newobj = PyString_FromStringAndSize(NULL, n);
2309 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002311 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312 if (0 < n) {
2313 int c = Py_CHARMASK(*s++);
2314 if (islower(c))
2315 *s_new = toupper(c);
2316 else
2317 *s_new = c;
2318 s_new++;
2319 }
2320 for (i = 1; i < n; i++) {
2321 int c = Py_CHARMASK(*s++);
2322 if (isupper(c))
2323 *s_new = tolower(c);
2324 else
2325 *s_new = c;
2326 s_new++;
2327 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002328 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329}
2330
2331
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002332PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333"S.count(sub[, start[, end]]) -> int\n\
2334\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002335Return the number of non-overlapping occurrences of substring sub in\n\
2336string S[start:end]. Optional arguments start and end are interpreted\n\
2337as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338
2339static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002340string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002342 PyObject *sub_obj;
2343 const char *str = PyString_AS_STRING(self), *sub;
2344 Py_ssize_t sub_len;
2345 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002347 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2348 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002350
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002351 if (PyString_Check(sub_obj)) {
2352 sub = PyString_AS_STRING(sub_obj);
2353 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002355#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002356 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002357 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002358 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002359 if (count == -1)
2360 return NULL;
2361 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002362 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002363 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002364#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002365 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002366 return NULL;
2367
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002368 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002369
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002370 return PyInt_FromSsize_t(
2371 stringlib_count(str + start, end - start, sub, sub_len)
2372 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373}
2374
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002375PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376"S.swapcase() -> string\n\
2377\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002379converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
2381static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002382string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383{
2384 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002385 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002386 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387
Anthony Baxtera6286212006-04-11 07:42:36 +00002388 newobj = PyString_FromStringAndSize(NULL, n);
2389 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002391 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 for (i = 0; i < n; i++) {
2393 int c = Py_CHARMASK(*s++);
2394 if (islower(c)) {
2395 *s_new = toupper(c);
2396 }
2397 else if (isupper(c)) {
2398 *s_new = tolower(c);
2399 }
2400 else
2401 *s_new = c;
2402 s_new++;
2403 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002404 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405}
2406
2407
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002408PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409"S.translate(table [,deletechars]) -> string\n\
2410\n\
2411Return a copy of the string S, where all characters occurring\n\
2412in the optional argument deletechars are removed, and the\n\
2413remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002414translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415
2416static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002417string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002420 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002421 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002423 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002424 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 PyObject *result;
2426 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002429 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002430 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432
2433 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002434 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 tablen = PyString_GET_SIZE(tableobj);
2436 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002437 else if (tableobj == Py_None) {
2438 table = NULL;
2439 tablen = 256;
2440 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002441#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002443 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 parameter; instead a mapping to None will cause characters
2445 to be deleted. */
2446 if (delobj != NULL) {
2447 PyErr_SetString(PyExc_TypeError,
2448 "deletions are implemented differently for unicode");
2449 return NULL;
2450 }
2451 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2452 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002453#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002454 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456
Martin v. Löwis00b61272002-12-12 20:03:19 +00002457 if (tablen != 256) {
2458 PyErr_SetString(PyExc_ValueError,
2459 "translation table must be 256 characters long");
2460 return NULL;
2461 }
2462
Guido van Rossum4c08d552000-03-10 22:55:18 +00002463 if (delobj != NULL) {
2464 if (PyString_Check(delobj)) {
2465 del_table = PyString_AS_STRING(delobj);
2466 dellen = PyString_GET_SIZE(delobj);
2467 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002468#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002469 else if (PyUnicode_Check(delobj)) {
2470 PyErr_SetString(PyExc_TypeError,
2471 "deletions are implemented differently for unicode");
2472 return NULL;
2473 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002474#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2476 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477 }
2478 else {
2479 del_table = NULL;
2480 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002481 }
2482
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002483 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484 result = PyString_FromStringAndSize((char *)NULL, inlen);
2485 if (result == NULL)
2486 return NULL;
2487 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002488 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002490 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491 /* If no deletions are required, use faster code */
2492 for (i = inlen; --i >= 0; ) {
2493 c = Py_CHARMASK(*input++);
2494 if (Py_CHARMASK((*output++ = table[c])) != c)
2495 changed = 1;
2496 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002497 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498 return result;
2499 Py_DECREF(result);
2500 Py_INCREF(input_obj);
2501 return input_obj;
2502 }
2503
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002504 if (table == NULL) {
2505 for (i = 0; i < 256; i++)
2506 trans_table[i] = Py_CHARMASK(i);
2507 } else {
2508 for (i = 0; i < 256; i++)
2509 trans_table[i] = Py_CHARMASK(table[i]);
2510 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511
2512 for (i = 0; i < dellen; i++)
2513 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2514
2515 for (i = inlen; --i >= 0; ) {
2516 c = Py_CHARMASK(*input++);
2517 if (trans_table[c] != -1)
2518 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2519 continue;
2520 changed = 1;
2521 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002522 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523 Py_DECREF(result);
2524 Py_INCREF(input_obj);
2525 return input_obj;
2526 }
2527 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002528 if (inlen > 0)
2529 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530 return result;
2531}
2532
2533
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002534#define FORWARD 1
2535#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002537/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002538
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002539#define findchar(target, target_len, c) \
2540 ((char *)memchr((const void *)(target), c, target_len))
2541
2542/* String ops must return a string. */
2543/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002544Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002545return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002547 if (PyString_CheckExact(self)) {
2548 Py_INCREF(self);
2549 return self;
2550 }
2551 return (PyStringObject *)PyString_FromStringAndSize(
2552 PyString_AS_STRING(self),
2553 PyString_GET_SIZE(self));
2554}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002555
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002556Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002557countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002558{
2559 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002560 const char *start=target;
2561 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 while ( (start=findchar(start, end-start, c)) != NULL ) {
2564 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002565 if (count >= maxcount)
2566 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 start += 1;
2568 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002569 return count;
2570}
2571
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002572Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002573findstring(const char *target, Py_ssize_t target_len,
2574 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002575 Py_ssize_t start,
2576 Py_ssize_t end,
2577 int direction)
2578{
2579 if (start < 0) {
2580 start += target_len;
2581 if (start < 0)
2582 start = 0;
2583 }
2584 if (end > target_len) {
2585 end = target_len;
2586 } else if (end < 0) {
2587 end += target_len;
2588 if (end < 0)
2589 end = 0;
2590 }
2591
2592 /* zero-length substrings always match at the first attempt */
2593 if (pattern_len == 0)
2594 return (direction > 0) ? start : end;
2595
2596 end -= pattern_len;
2597
2598 if (direction < 0) {
2599 for (; end >= start; end--)
2600 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2601 return end;
2602 } else {
2603 for (; start <= end; start++)
2604 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2605 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002606 }
2607 return -1;
2608}
2609
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002610Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002611countstring(const char *target, Py_ssize_t target_len,
2612 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613 Py_ssize_t start,
2614 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002615 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002616{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002617 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002619 if (start < 0) {
2620 start += target_len;
2621 if (start < 0)
2622 start = 0;
2623 }
2624 if (end > target_len) {
2625 end = target_len;
2626 } else if (end < 0) {
2627 end += target_len;
2628 if (end < 0)
2629 end = 0;
2630 }
2631
2632 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002633 if (pattern_len == 0 || maxcount == 0) {
2634 if (target_len+1 < maxcount)
2635 return target_len+1;
2636 return maxcount;
2637 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638
2639 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002640 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002641 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002642 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2643 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002644 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002645 end -= pattern_len-1;
2646 }
2647 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002648 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2650 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002651 if (--maxcount <= 0)
2652 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002653 start += pattern_len-1;
2654 }
2655 }
2656 return count;
2657}
2658
2659
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002660/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661
2662/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002663Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002664replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002665 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666 Py_ssize_t maxcount)
2667{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002668 char *self_s, *result_s;
2669 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002670 Py_ssize_t count, i, product;
2671 PyStringObject *result;
2672
2673 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002674
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 /* 1 at the end plus 1 after every character */
2676 count = self_len+1;
2677 if (maxcount < count)
2678 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002679
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680 /* Check for overflow */
2681 /* result_len = count * to_len + self_len; */
2682 product = count * to_len;
2683 if (product / to_len != count) {
2684 PyErr_SetString(PyExc_OverflowError,
2685 "replace string is too long");
2686 return NULL;
2687 }
2688 result_len = product + self_len;
2689 if (result_len < 0) {
2690 PyErr_SetString(PyExc_OverflowError,
2691 "replace string is too long");
2692 return NULL;
2693 }
2694
2695 if (! (result = (PyStringObject *)
2696 PyString_FromStringAndSize(NULL, result_len)) )
2697 return NULL;
2698
2699 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002700 result_s = PyString_AS_STRING(result);
2701
2702 /* TODO: special case single character, which doesn't need memcpy */
2703
2704 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002705 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002706 result_s += to_len;
2707 count -= 1;
2708
2709 for (i=0; i<count; i++) {
2710 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002711 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002712 result_s += to_len;
2713 }
2714
2715 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002716 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717
2718 return result;
2719}
2720
2721/* Special case for deleting a single character */
2722/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002723Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724replace_delete_single_character(PyStringObject *self,
2725 char from_c, Py_ssize_t maxcount)
2726{
2727 char *self_s, *result_s;
2728 char *start, *next, *end;
2729 Py_ssize_t self_len, result_len;
2730 Py_ssize_t count;
2731 PyStringObject *result;
2732
2733 self_len = PyString_GET_SIZE(self);
2734 self_s = PyString_AS_STRING(self);
2735
Andrew Dalke51324072006-05-26 20:25:22 +00002736 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737 if (count == 0) {
2738 return return_self(self);
2739 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740
2741 result_len = self_len - count; /* from_len == 1 */
2742 assert(result_len>=0);
2743
2744 if ( (result = (PyStringObject *)
2745 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2746 return NULL;
2747 result_s = PyString_AS_STRING(result);
2748
2749 start = self_s;
2750 end = self_s + self_len;
2751 while (count-- > 0) {
2752 next = findchar(start, end-start, from_c);
2753 if (next == NULL)
2754 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002755 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 result_s += (next-start);
2757 start = next+1;
2758 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002759 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002760
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 return result;
2762}
2763
2764/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2765
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002766Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002767replace_delete_substring(PyStringObject *self,
2768 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002770 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002772 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773 Py_ssize_t count, offset;
2774 PyStringObject *result;
2775
2776 self_len = PyString_GET_SIZE(self);
2777 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778
2779 count = countstring(self_s, self_len,
2780 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002781 0, self_len, 1,
2782 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783
2784 if (count == 0) {
2785 /* no matches */
2786 return return_self(self);
2787 }
2788
2789 result_len = self_len - (count * from_len);
2790 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002791
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 if ( (result = (PyStringObject *)
2793 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2794 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002795
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002797
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002798 start = self_s;
2799 end = self_s + self_len;
2800 while (count-- > 0) {
2801 offset = findstring(start, end-start,
2802 from_s, from_len,
2803 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002804 if (offset == -1)
2805 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002807
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002808 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002809
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810 result_s += (next-start);
2811 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002812 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002813 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002815}
2816
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002818Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819replace_single_character_in_place(PyStringObject *self,
2820 char from_c, char to_c,
2821 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002822{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823 char *self_s, *result_s, *start, *end, *next;
2824 Py_ssize_t self_len;
2825 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002826
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 /* The result string will be the same size */
2828 self_s = PyString_AS_STRING(self);
2829 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002830
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002831 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002832
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002833 if (next == NULL) {
2834 /* No matches; return the original string */
2835 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002836 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002837
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002838 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002839 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840 if (result == NULL)
2841 return NULL;
2842 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002843 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002844
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 /* change everything in-place, starting with this one */
2846 start = result_s + (next-self_s);
2847 *start = to_c;
2848 start++;
2849 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002850
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002851 while (--maxcount > 0) {
2852 next = findchar(start, end-start, from_c);
2853 if (next == NULL)
2854 break;
2855 *next = to_c;
2856 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002857 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002858
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002860}
2861
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002862/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002863Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002865 const char *from_s, Py_ssize_t from_len,
2866 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 Py_ssize_t maxcount)
2868{
2869 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002870 char *self_s;
2871 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002875
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876 self_s = PyString_AS_STRING(self);
2877 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002878
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 offset = findstring(self_s, self_len,
2880 from_s, from_len,
2881 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002882 if (offset == -1) {
2883 /* No matches; return the original string */
2884 return return_self(self);
2885 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002886
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002887 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002888 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002889 if (result == NULL)
2890 return NULL;
2891 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002892 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002893
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 /* change everything in-place, starting with this one */
2895 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002896 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002897 start += from_len;
2898 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002899
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900 while ( --maxcount > 0) {
2901 offset = findstring(start, end-start,
2902 from_s, from_len,
2903 0, end-start, FORWARD);
2904 if (offset==-1)
2905 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002906 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002907 start += offset+from_len;
2908 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002909
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 return result;
2911}
2912
2913/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002914Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915replace_single_character(PyStringObject *self,
2916 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002917 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002918 Py_ssize_t maxcount)
2919{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002920 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002921 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002922 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 Py_ssize_t count, product;
2924 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002925
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002926 self_s = PyString_AS_STRING(self);
2927 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002928
Andrew Dalke51324072006-05-26 20:25:22 +00002929 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930 if (count == 0) {
2931 /* no matches, return unchanged */
2932 return return_self(self);
2933 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002934
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 /* use the difference between current and new, hence the "-1" */
2936 /* result_len = self_len + count * (to_len-1) */
2937 product = count * (to_len-1);
2938 if (product / (to_len-1) != count) {
2939 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2940 return NULL;
2941 }
2942 result_len = self_len + product;
2943 if (result_len < 0) {
2944 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2945 return NULL;
2946 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002947
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002948 if ( (result = (PyStringObject *)
2949 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2950 return NULL;
2951 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002952
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953 start = self_s;
2954 end = self_s + self_len;
2955 while (count-- > 0) {
2956 next = findchar(start, end-start, from_c);
2957 if (next == NULL)
2958 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002959
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002960 if (next == start) {
2961 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002962 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002963 result_s += to_len;
2964 start += 1;
2965 } else {
2966 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002967 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002968 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002969 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002970 result_s += to_len;
2971 start = next+1;
2972 }
2973 }
2974 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002975 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002976
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002977 return result;
2978}
2979
2980/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002981Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002983 const char *from_s, Py_ssize_t from_len,
2984 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002985 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002986 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002987 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002988 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002989 Py_ssize_t count, offset, product;
2990 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002991
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002992 self_s = PyString_AS_STRING(self);
2993 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002994
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002995 count = countstring(self_s, self_len,
2996 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002997 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998 if (count == 0) {
2999 /* no matches, return unchanged */
3000 return return_self(self);
3001 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003002
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003003 /* Check for overflow */
3004 /* result_len = self_len + count * (to_len-from_len) */
3005 product = count * (to_len-from_len);
3006 if (product / (to_len-from_len) != count) {
3007 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3008 return NULL;
3009 }
3010 result_len = self_len + product;
3011 if (result_len < 0) {
3012 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3013 return NULL;
3014 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003015
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003016 if ( (result = (PyStringObject *)
3017 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3018 return NULL;
3019 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003020
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003021 start = self_s;
3022 end = self_s + self_len;
3023 while (count-- > 0) {
3024 offset = findstring(start, end-start,
3025 from_s, from_len,
3026 0, end-start, FORWARD);
3027 if (offset == -1)
3028 break;
3029 next = start+offset;
3030 if (next == start) {
3031 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003032 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003033 result_s += to_len;
3034 start += from_len;
3035 } else {
3036 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003037 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003038 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003039 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003040 result_s += to_len;
3041 start = next+from_len;
3042 }
3043 }
3044 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003045 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003046
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003047 return result;
3048}
3049
3050
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003051Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003052replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003053 const char *from_s, Py_ssize_t from_len,
3054 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 Py_ssize_t maxcount)
3056{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (maxcount < 0) {
3058 maxcount = PY_SSIZE_T_MAX;
3059 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3060 /* nothing to do; return the original string */
3061 return return_self(self);
3062 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003063
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 if (maxcount == 0 ||
3065 (from_len == 0 && to_len == 0)) {
3066 /* nothing to do; return the original string */
3067 return return_self(self);
3068 }
3069
3070 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003071
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 if (from_len == 0) {
3073 /* insert the 'to' string everywhere. */
3074 /* >>> "Python".replace("", ".") */
3075 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003076 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 }
3078
3079 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3080 /* point for an empty self string to generate a non-empty string */
3081 /* Special case so the remaining code always gets a non-empty string */
3082 if (PyString_GET_SIZE(self) == 0) {
3083 return return_self(self);
3084 }
3085
3086 if (to_len == 0) {
3087 /* delete all occurances of 'from' string */
3088 if (from_len == 1) {
3089 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003090 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003091 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003092 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003093 }
3094 }
3095
3096 /* Handle special case where both strings have the same length */
3097
3098 if (from_len == to_len) {
3099 if (from_len == 1) {
3100 return replace_single_character_in_place(
3101 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003102 from_s[0],
3103 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003104 maxcount);
3105 } else {
3106 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003107 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003108 }
3109 }
3110
3111 /* Otherwise use the more generic algorithms */
3112 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003113 return replace_single_character(self, from_s[0],
3114 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003115 } else {
3116 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003117 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003118 }
3119}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003120
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003121PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003122"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003123\n\
3124Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003125old replaced by new. If the optional argument count is\n\
3126given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003127
3128static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003129string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003130{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003131 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003132 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003133 const char *from_s, *to_s;
3134 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003135
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003136 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003139 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003140 from_s = PyString_AS_STRING(from);
3141 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003142 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003143#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003144 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003145 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003146 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003147#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003148 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149 return NULL;
3150
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003151 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003152 to_s = PyString_AS_STRING(to);
3153 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003155#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003156 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003157 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003158 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003159#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003160 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
3162
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003164 from_s, from_len,
3165 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166}
3167
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003168/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003170/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003171 * against substr, using the start and end arguments. Returns
3172 * -1 on error, 0 if not found and 1 if found.
3173 */
3174Py_LOCAL(int)
3175_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3176 Py_ssize_t end, int direction)
3177{
3178 Py_ssize_t len = PyString_GET_SIZE(self);
3179 Py_ssize_t slen;
3180 const char* sub;
3181 const char* str;
3182
3183 if (PyString_Check(substr)) {
3184 sub = PyString_AS_STRING(substr);
3185 slen = PyString_GET_SIZE(substr);
3186 }
3187#ifdef Py_USING_UNICODE
3188 else if (PyUnicode_Check(substr))
3189 return PyUnicode_Tailmatch((PyObject *)self,
3190 substr, start, end, direction);
3191#endif
3192 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3193 return -1;
3194 str = PyString_AS_STRING(self);
3195
3196 string_adjust_indices(&start, &end, len);
3197
3198 if (direction < 0) {
3199 /* startswith */
3200 if (start+slen > len)
3201 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003202 } else {
3203 /* endswith */
3204 if (end-start < slen || start > len)
3205 return 0;
3206
3207 if (end-slen > start)
3208 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003209 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003210 if (end-start >= slen)
3211 return ! memcmp(str+start, sub, slen);
3212 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003213}
3214
3215
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003216PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003217"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003219Return True if S starts with the specified prefix, False otherwise.\n\
3220With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003221With optional end, stop comparing S at that position.\n\
3222prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223
3224static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003225string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003226{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003227 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003228 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003229 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003230 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003231
Guido van Rossumc6821402000-05-08 14:08:05 +00003232 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3233 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003235 if (PyTuple_Check(subobj)) {
3236 Py_ssize_t i;
3237 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3238 result = _string_tailmatch(self,
3239 PyTuple_GET_ITEM(subobj, i),
3240 start, end, -1);
3241 if (result == -1)
3242 return NULL;
3243 else if (result) {
3244 Py_RETURN_TRUE;
3245 }
3246 }
3247 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003248 }
Georg Brandl24250812006-06-09 18:45:48 +00003249 result = _string_tailmatch(self, subobj, start, end, -1);
3250 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003251 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003252 else
Georg Brandl24250812006-06-09 18:45:48 +00003253 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003254}
3255
3256
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003257PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003258"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003259\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003260Return True if S ends with the specified suffix, False otherwise.\n\
3261With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003262With optional end, stop comparing S at that position.\n\
3263suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003264
3265static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003266string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003267{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003268 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003269 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003271 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003272
Guido van Rossumc6821402000-05-08 14:08:05 +00003273 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3274 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003275 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003276 if (PyTuple_Check(subobj)) {
3277 Py_ssize_t i;
3278 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3279 result = _string_tailmatch(self,
3280 PyTuple_GET_ITEM(subobj, i),
3281 start, end, +1);
3282 if (result == -1)
3283 return NULL;
3284 else if (result) {
3285 Py_RETURN_TRUE;
3286 }
3287 }
3288 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003289 }
Georg Brandl24250812006-06-09 18:45:48 +00003290 result = _string_tailmatch(self, subobj, start, end, +1);
3291 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003292 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003293 else
Georg Brandl24250812006-06-09 18:45:48 +00003294 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003295}
3296
3297
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003298PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003299"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003300\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003301Encodes S using the codec registered for encoding. encoding defaults\n\
3302to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003303handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003304a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3305'xmlcharrefreplace' as well as any other name registered with\n\
3306codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003307
3308static PyObject *
3309string_encode(PyStringObject *self, PyObject *args)
3310{
3311 char *encoding = NULL;
3312 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003313 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003314
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003315 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3316 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003317 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003318 if (v == NULL)
3319 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003320 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3321 PyErr_Format(PyExc_TypeError,
3322 "encoder did not return a string/unicode object "
3323 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003324 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003325 Py_DECREF(v);
3326 return NULL;
3327 }
3328 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003329
3330 onError:
3331 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003332}
3333
3334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003335PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003336"S.decode([encoding[,errors]]) -> object\n\
3337\n\
3338Decodes S using the codec registered for encoding. encoding defaults\n\
3339to the default encoding. errors may be given to set a different error\n\
3340handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003341a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3342as well as any other name registerd with codecs.register_error that is\n\
3343able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003344
3345static PyObject *
3346string_decode(PyStringObject *self, PyObject *args)
3347{
3348 char *encoding = NULL;
3349 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003350 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003351
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003352 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3353 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003354 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003355 if (v == NULL)
3356 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003357 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3358 PyErr_Format(PyExc_TypeError,
3359 "decoder did not return a string/unicode object "
3360 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003361 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003362 Py_DECREF(v);
3363 return NULL;
3364 }
3365 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003366
3367 onError:
3368 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003369}
3370
3371
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003372PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373"S.expandtabs([tabsize]) -> string\n\
3374\n\
3375Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003376If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003377
3378static PyObject*
3379string_expandtabs(PyStringObject *self, PyObject *args)
3380{
Guido van Rossum5bdff602008-03-11 21:18:06 +00003381 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382 char *q;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003383 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384 PyObject *u;
3385 int tabsize = 8;
3386
3387 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3388 return NULL;
3389
Thomas Wouters7e474022000-07-16 12:04:32 +00003390 /* First pass: determine size of output string */
Guido van Rossum5bdff602008-03-11 21:18:06 +00003391 i = 0; /* chars up to and including most recent \n or \r */
3392 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3393 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003394 for (p = PyString_AS_STRING(self); p < e; p++)
3395 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003396 if (tabsize > 0) {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003397 incr = tabsize - (j % tabsize);
3398 if (j > PY_SSIZE_T_MAX - incr)
3399 goto overflow1;
3400 j += incr;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003401 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402 }
3403 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003404 if (j > PY_SSIZE_T_MAX - 1)
3405 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 j++;
3407 if (*p == '\n' || *p == '\r') {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003408 if (i > PY_SSIZE_T_MAX - j)
3409 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 i += j;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003411 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 }
3413 }
3414
Guido van Rossum5bdff602008-03-11 21:18:06 +00003415 if (i > PY_SSIZE_T_MAX - j)
3416 goto overflow1;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003417
Guido van Rossum4c08d552000-03-10 22:55:18 +00003418 /* Second pass: create output string and fill it */
3419 u = PyString_FromStringAndSize(NULL, i + j);
3420 if (!u)
3421 return NULL;
3422
Guido van Rossum5bdff602008-03-11 21:18:06 +00003423 j = 0; /* same as in first pass */
3424 q = PyString_AS_STRING(u); /* next output char */
3425 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426
3427 for (p = PyString_AS_STRING(self); p < e; p++)
3428 if (*p == '\t') {
3429 if (tabsize > 0) {
3430 i = tabsize - (j % tabsize);
3431 j += i;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003432 while (i--) {
3433 if (q >= qe)
3434 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435 *q++ = ' ';
Guido van Rossum5bdff602008-03-11 21:18:06 +00003436 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003437 }
3438 }
3439 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003440 if (q >= qe)
3441 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442 *q++ = *p;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003443 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444 if (*p == '\n' || *p == '\r')
3445 j = 0;
3446 }
3447
3448 return u;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003449
3450 overflow2:
3451 Py_DECREF(u);
3452 overflow1:
3453 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3454 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455}
3456
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003457Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003458pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003459{
3460 PyObject *u;
3461
3462 if (left < 0)
3463 left = 0;
3464 if (right < 0)
3465 right = 0;
3466
Tim Peters8fa5dd02001-09-12 02:18:30 +00003467 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468 Py_INCREF(self);
3469 return (PyObject *)self;
3470 }
3471
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003472 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473 left + PyString_GET_SIZE(self) + right);
3474 if (u) {
3475 if (left)
3476 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003477 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003478 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003479 PyString_GET_SIZE(self));
3480 if (right)
3481 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3482 fill, right);
3483 }
3484
3485 return u;
3486}
3487
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003490"\n"
3491"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003492"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493
3494static PyObject *
3495string_ljust(PyStringObject *self, PyObject *args)
3496{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003497 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003498 char fillchar = ' ';
3499
Thomas Wouters4abb3662006-04-19 14:50:15 +00003500 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501 return NULL;
3502
Tim Peters8fa5dd02001-09-12 02:18:30 +00003503 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003504 Py_INCREF(self);
3505 return (PyObject*) self;
3506 }
3507
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003508 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509}
3510
3511
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003512PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003513"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003514"\n"
3515"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003516"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003517
3518static PyObject *
3519string_rjust(PyStringObject *self, PyObject *args)
3520{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003521 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003522 char fillchar = ' ';
3523
Thomas Wouters4abb3662006-04-19 14:50:15 +00003524 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525 return NULL;
3526
Tim Peters8fa5dd02001-09-12 02:18:30 +00003527 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003528 Py_INCREF(self);
3529 return (PyObject*) self;
3530 }
3531
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003532 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533}
3534
3535
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003536PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003537"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003538"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003539"Return S centered in a string of length width. Padding is\n"
3540"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541
3542static PyObject *
3543string_center(PyStringObject *self, PyObject *args)
3544{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003545 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003546 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003547 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548
Thomas Wouters4abb3662006-04-19 14:50:15 +00003549 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550 return NULL;
3551
Tim Peters8fa5dd02001-09-12 02:18:30 +00003552 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553 Py_INCREF(self);
3554 return (PyObject*) self;
3555 }
3556
3557 marg = width - PyString_GET_SIZE(self);
3558 left = marg / 2 + (marg & width & 1);
3559
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003560 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561}
3562
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003563PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003564"S.zfill(width) -> string\n"
3565"\n"
3566"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003567"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003568
3569static PyObject *
3570string_zfill(PyStringObject *self, PyObject *args)
3571{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003572 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003573 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003574 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003575 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003576
Thomas Wouters4abb3662006-04-19 14:50:15 +00003577 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003578 return NULL;
3579
3580 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003581 if (PyString_CheckExact(self)) {
3582 Py_INCREF(self);
3583 return (PyObject*) self;
3584 }
3585 else
3586 return PyString_FromStringAndSize(
3587 PyString_AS_STRING(self),
3588 PyString_GET_SIZE(self)
3589 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003590 }
3591
3592 fill = width - PyString_GET_SIZE(self);
3593
3594 s = pad(self, fill, 0, '0');
3595
3596 if (s == NULL)
3597 return NULL;
3598
3599 p = PyString_AS_STRING(s);
3600 if (p[fill] == '+' || p[fill] == '-') {
3601 /* move sign to beginning of string */
3602 p[0] = p[fill];
3603 p[fill] = '0';
3604 }
3605
3606 return (PyObject*) s;
3607}
3608
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003609PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003610"S.isspace() -> bool\n\
3611\n\
3612Return True if all characters in S are whitespace\n\
3613and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003614
3615static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003616string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617{
Fred Drakeba096332000-07-09 07:04:36 +00003618 register const unsigned char *p
3619 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003620 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622 /* Shortcut for single character strings */
3623 if (PyString_GET_SIZE(self) == 1 &&
3624 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003627 /* Special case for empty strings */
3628 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003630
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631 e = p + PyString_GET_SIZE(self);
3632 for (; p < e; p++) {
3633 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003636 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637}
3638
3639
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003640PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003642\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003643Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003644and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003645
3646static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003647string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003648{
Fred Drakeba096332000-07-09 07:04:36 +00003649 register const unsigned char *p
3650 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003651 register const unsigned char *e;
3652
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003653 /* Shortcut for single character strings */
3654 if (PyString_GET_SIZE(self) == 1 &&
3655 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003657
3658 /* Special case for empty strings */
3659 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003661
3662 e = p + PyString_GET_SIZE(self);
3663 for (; p < e; p++) {
3664 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003666 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003668}
3669
3670
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003671PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003673\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003674Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003675and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003676
3677static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003678string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003679{
Fred Drakeba096332000-07-09 07:04:36 +00003680 register const unsigned char *p
3681 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003682 register const unsigned char *e;
3683
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684 /* Shortcut for single character strings */
3685 if (PyString_GET_SIZE(self) == 1 &&
3686 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003688
3689 /* Special case for empty strings */
3690 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003692
3693 e = p + PyString_GET_SIZE(self);
3694 for (; p < e; p++) {
3695 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003697 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003698 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003699}
3700
3701
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003702PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003705Return True if all characters in S are digits\n\
3706and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707
3708static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003709string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710{
Fred Drakeba096332000-07-09 07:04:36 +00003711 register const unsigned char *p
3712 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003713 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715 /* Shortcut for single character strings */
3716 if (PyString_GET_SIZE(self) == 1 &&
3717 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003720 /* Special case for empty strings */
3721 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003722 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003723
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724 e = p + PyString_GET_SIZE(self);
3725 for (; p < e; p++) {
3726 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730}
3731
3732
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003733PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003737at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738
3739static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003740string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741{
Fred Drakeba096332000-07-09 07:04:36 +00003742 register const unsigned char *p
3743 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003744 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 int cased;
3746
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 /* Shortcut for single character strings */
3748 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003751 /* Special case for empty strings */
3752 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003753 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003754
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755 e = p + PyString_GET_SIZE(self);
3756 cased = 0;
3757 for (; p < e; p++) {
3758 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 else if (!cased && islower(*p))
3761 cased = 1;
3762 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003763 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764}
3765
3766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003767PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003768"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003770Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003771at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772
3773static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003774string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775{
Fred Drakeba096332000-07-09 07:04:36 +00003776 register const unsigned char *p
3777 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003778 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 int cased;
3780
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781 /* Shortcut for single character strings */
3782 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003783 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003785 /* Special case for empty strings */
3786 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003787 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003788
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 e = p + PyString_GET_SIZE(self);
3790 cased = 0;
3791 for (; p < e; p++) {
3792 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003793 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 else if (!cased && isupper(*p))
3795 cased = 1;
3796 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003797 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798}
3799
3800
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003801PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003802"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003804Return True if S is a titlecased string and there is at least one\n\
3805character in S, i.e. uppercase characters may only follow uncased\n\
3806characters and lowercase characters only cased ones. Return False\n\
3807otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808
3809static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003810string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811{
Fred Drakeba096332000-07-09 07:04:36 +00003812 register const unsigned char *p
3813 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003814 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 int cased, previous_is_cased;
3816
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 /* Shortcut for single character strings */
3818 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003819 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003821 /* Special case for empty strings */
3822 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003823 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003824
Guido van Rossum4c08d552000-03-10 22:55:18 +00003825 e = p + PyString_GET_SIZE(self);
3826 cased = 0;
3827 previous_is_cased = 0;
3828 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003829 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830
3831 if (isupper(ch)) {
3832 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003833 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834 previous_is_cased = 1;
3835 cased = 1;
3836 }
3837 else if (islower(ch)) {
3838 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003839 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840 previous_is_cased = 1;
3841 cased = 1;
3842 }
3843 else
3844 previous_is_cased = 0;
3845 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003846 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847}
3848
3849
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003850PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003851"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852\n\
3853Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003854Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003855is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003856
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857static PyObject*
3858string_splitlines(PyStringObject *self, PyObject *args)
3859{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003860 register Py_ssize_t i;
3861 register Py_ssize_t j;
3862 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003863 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003864 PyObject *list;
3865 PyObject *str;
3866 char *data;
3867
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003868 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003869 return NULL;
3870
3871 data = PyString_AS_STRING(self);
3872 len = PyString_GET_SIZE(self);
3873
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003874 /* This does not use the preallocated list because splitlines is
3875 usually run with hundreds of newlines. The overhead of
3876 switching between PyList_SET_ITEM and append causes about a
3877 2-3% slowdown for that common case. A smarter implementation
3878 could move the if check out, so the SET_ITEMs are done first
3879 and the appends only done when the prealloc buffer is full.
3880 That's too much work for little gain.*/
3881
Guido van Rossum4c08d552000-03-10 22:55:18 +00003882 list = PyList_New(0);
3883 if (!list)
3884 goto onError;
3885
3886 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003887 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003888
Guido van Rossum4c08d552000-03-10 22:55:18 +00003889 /* Find a line and append it */
3890 while (i < len && data[i] != '\n' && data[i] != '\r')
3891 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003892
3893 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003894 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003895 if (i < len) {
3896 if (data[i] == '\r' && i + 1 < len &&
3897 data[i+1] == '\n')
3898 i += 2;
3899 else
3900 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003901 if (keepends)
3902 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003903 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003904 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003905 j = i;
3906 }
3907 if (j < len) {
3908 SPLIT_APPEND(data, j, len);
3909 }
3910
3911 return list;
3912
3913 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003914 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003915 return NULL;
3916}
3917
3918#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003919#undef SPLIT_ADD
3920#undef MAX_PREALLOC
3921#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003922
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003923static PyObject *
3924string_getnewargs(PyStringObject *v)
3925{
Christian Heimese93237d2007-12-19 02:37:44 +00003926 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003927}
3928
Eric Smitha9f7d622008-02-17 19:46:49 +00003929
3930#include "stringlib/string_format.h"
3931
3932PyDoc_STRVAR(format__doc__,
3933"S.format(*args, **kwargs) -> unicode\n\
3934\n\
3935");
3936
3937PyDoc_STRVAR(p_format__doc__,
3938"S.__format__(format_spec) -> unicode\n\
3939\n\
3940");
3941
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003942
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003943static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003944string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003945 /* Counterparts of the obsolete stropmodule functions; except
3946 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003947 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3948 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003949 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003950 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3951 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003952 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3953 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3954 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3955 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3956 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3957 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3958 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003959 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3960 capitalize__doc__},
3961 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3962 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3963 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003964 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003965 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3966 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3967 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3968 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3969 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3970 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3971 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003972 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3973 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003974 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3975 startswith__doc__},
3976 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3977 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3978 swapcase__doc__},
3979 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3980 translate__doc__},
3981 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3982 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3983 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3984 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3985 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Eric Smitha9f7d622008-02-17 19:46:49 +00003986 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3987 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3988 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3989 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003990 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3991 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3992 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3993 expandtabs__doc__},
3994 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3995 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003996 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003997 {NULL, NULL} /* sentinel */
3998};
3999
Jeremy Hylton938ace62002-07-17 16:30:39 +00004000static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004001str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4002
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004003static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004004string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004005{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004006 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004007 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004008
Guido van Rossumae960af2001-08-30 03:11:59 +00004009 if (type != &PyString_Type)
4010 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4012 return NULL;
4013 if (x == NULL)
4014 return PyString_FromString("");
4015 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004016}
4017
Guido van Rossumae960af2001-08-30 03:11:59 +00004018static PyObject *
4019str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4020{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004021 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004022 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004023
4024 assert(PyType_IsSubtype(type, &PyString_Type));
4025 tmp = string_new(&PyString_Type, args, kwds);
4026 if (tmp == NULL)
4027 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004028 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004029 n = PyString_GET_SIZE(tmp);
4030 pnew = type->tp_alloc(type, n);
4031 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004032 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004033 ((PyStringObject *)pnew)->ob_shash =
4034 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004035 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004036 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004037 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004038 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004039}
4040
Guido van Rossumcacfc072002-05-24 19:01:59 +00004041static PyObject *
4042basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4043{
4044 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004045 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004046 return NULL;
4047}
4048
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004049static PyObject *
4050string_mod(PyObject *v, PyObject *w)
4051{
4052 if (!PyString_Check(v)) {
4053 Py_INCREF(Py_NotImplemented);
4054 return Py_NotImplemented;
4055 }
4056 return PyString_Format(v, w);
4057}
4058
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004059PyDoc_STRVAR(basestring_doc,
4060"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004061
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004062static PyNumberMethods string_as_number = {
4063 0, /*nb_add*/
4064 0, /*nb_subtract*/
4065 0, /*nb_multiply*/
4066 0, /*nb_divide*/
4067 string_mod, /*nb_remainder*/
4068};
4069
4070
Guido van Rossumcacfc072002-05-24 19:01:59 +00004071PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004072 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004073 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004074 0,
4075 0,
4076 0, /* tp_dealloc */
4077 0, /* tp_print */
4078 0, /* tp_getattr */
4079 0, /* tp_setattr */
4080 0, /* tp_compare */
4081 0, /* tp_repr */
4082 0, /* tp_as_number */
4083 0, /* tp_as_sequence */
4084 0, /* tp_as_mapping */
4085 0, /* tp_hash */
4086 0, /* tp_call */
4087 0, /* tp_str */
4088 0, /* tp_getattro */
4089 0, /* tp_setattro */
4090 0, /* tp_as_buffer */
4091 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4092 basestring_doc, /* tp_doc */
4093 0, /* tp_traverse */
4094 0, /* tp_clear */
4095 0, /* tp_richcompare */
4096 0, /* tp_weaklistoffset */
4097 0, /* tp_iter */
4098 0, /* tp_iternext */
4099 0, /* tp_methods */
4100 0, /* tp_members */
4101 0, /* tp_getset */
4102 &PyBaseObject_Type, /* tp_base */
4103 0, /* tp_dict */
4104 0, /* tp_descr_get */
4105 0, /* tp_descr_set */
4106 0, /* tp_dictoffset */
4107 0, /* tp_init */
4108 0, /* tp_alloc */
4109 basestring_new, /* tp_new */
4110 0, /* tp_free */
4111};
4112
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004113PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004114"str(object) -> string\n\
4115\n\
4116Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004117If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004118
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004119PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004120 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004121 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004122 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004123 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004124 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004125 (printfunc)string_print, /* tp_print */
4126 0, /* tp_getattr */
4127 0, /* tp_setattr */
4128 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004129 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004130 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004131 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004132 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004133 (hashfunc)string_hash, /* tp_hash */
4134 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004135 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004136 PyObject_GenericGetAttr, /* tp_getattro */
4137 0, /* tp_setattro */
4138 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004139 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Christian Heimes1a6387e2008-03-26 12:49:49 +00004140 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4141 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004142 string_doc, /* tp_doc */
4143 0, /* tp_traverse */
4144 0, /* tp_clear */
4145 (richcmpfunc)string_richcompare, /* tp_richcompare */
4146 0, /* tp_weaklistoffset */
4147 0, /* tp_iter */
4148 0, /* tp_iternext */
4149 string_methods, /* tp_methods */
4150 0, /* tp_members */
4151 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004152 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004153 0, /* tp_dict */
4154 0, /* tp_descr_get */
4155 0, /* tp_descr_set */
4156 0, /* tp_dictoffset */
4157 0, /* tp_init */
4158 0, /* tp_alloc */
4159 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004160 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004161};
4162
4163void
Fred Drakeba096332000-07-09 07:04:36 +00004164PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004165{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004166 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004167 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004168 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004169 if (w == NULL || !PyString_Check(*pv)) {
4170 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004171 *pv = NULL;
4172 return;
4173 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004174 v = string_concat((PyStringObject *) *pv, w);
4175 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004176 *pv = v;
4177}
4178
Guido van Rossum013142a1994-08-30 08:19:36 +00004179void
Fred Drakeba096332000-07-09 07:04:36 +00004180PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004182 PyString_Concat(pv, w);
4183 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004184}
4185
4186
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004187/* The following function breaks the notion that strings are immutable:
4188 it changes the size of a string. We get away with this only if there
4189 is only one module referencing the object. You can also think of it
4190 as creating a new string object and destroying the old one, only
4191 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004192 already be known to some other part of the code...
4193 Note that if there's not enough memory to resize the string, the original
4194 string object at *pv is deallocated, *pv is set to NULL, an "out of
4195 memory" exception is set, and -1 is returned. Else (on success) 0 is
4196 returned, and the value in *pv may or may not be the same as on input.
4197 As always, an extra byte is allocated for a trailing \0 byte (newsize
4198 does *not* include that), and a trailing \0 byte is stored.
4199*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004200
4201int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004202_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004203{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004204 register PyObject *v;
4205 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004206 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004207 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004208 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004209 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004210 Py_DECREF(v);
4211 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004212 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004213 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004214 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004215 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004216 _Py_ForgetReference(v);
4217 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004218 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004219 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004220 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004221 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004222 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004223 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004224 _Py_NewReference(*pv);
4225 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004226 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004227 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004228 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004229 return 0;
4230}
Guido van Rossume5372401993-03-16 12:15:04 +00004231
4232/* Helpers for formatstring */
4233
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004234Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004235getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004236{
Thomas Wouters977485d2006-02-16 15:59:12 +00004237 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004238 if (argidx < arglen) {
4239 (*p_argidx)++;
4240 if (arglen < 0)
4241 return args;
4242 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004243 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004244 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004245 PyErr_SetString(PyExc_TypeError,
4246 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004247 return NULL;
4248}
4249
Tim Peters38fd5b62000-09-21 05:43:11 +00004250/* Format codes
4251 * F_LJUST '-'
4252 * F_SIGN '+'
4253 * F_BLANK ' '
4254 * F_ALT '#'
4255 * F_ZERO '0'
4256 */
Guido van Rossume5372401993-03-16 12:15:04 +00004257#define F_LJUST (1<<0)
4258#define F_SIGN (1<<1)
4259#define F_BLANK (1<<2)
4260#define F_ALT (1<<3)
4261#define F_ZERO (1<<4)
4262
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004263Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004264formatfloat(char *buf, size_t buflen, int flags,
4265 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004266{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004267 /* fmt = '%#.' + `prec` + `type`
4268 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004269 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004270 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004271 x = PyFloat_AsDouble(v);
4272 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004273 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004274 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004275 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004276 }
Guido van Rossume5372401993-03-16 12:15:04 +00004277 if (prec < 0)
4278 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004279 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4280 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004281 /* Worst case length calc to ensure no buffer overrun:
4282
4283 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004284 fmt = %#.<prec>g
4285 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004286 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004287 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004288
4289 'f' formats:
4290 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4291 len = 1 + 50 + 1 + prec = 52 + prec
4292
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004293 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004294 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004295
4296 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004297 if (((type == 'g' || type == 'G') &&
4298 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004299 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004300 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004301 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004302 return -1;
4303 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004304 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4305 (flags&F_ALT) ? "#" : "",
4306 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004307 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004308 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004309}
4310
Tim Peters38fd5b62000-09-21 05:43:11 +00004311/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4312 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4313 * Python's regular ints.
4314 * Return value: a new PyString*, or NULL if error.
4315 * . *pbuf is set to point into it,
4316 * *plen set to the # of chars following that.
4317 * Caller must decref it when done using pbuf.
4318 * The string starting at *pbuf is of the form
4319 * "-"? ("0x" | "0X")? digit+
4320 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004321 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004322 * There will be at least prec digits, zero-filled on the left if
4323 * necessary to get that many.
4324 * val object to be converted
4325 * flags bitmask of format flags; only F_ALT is looked at
4326 * prec minimum number of digits; 0-fill on left if needed
4327 * type a character in [duoxX]; u acts the same as d
4328 *
4329 * CAUTION: o, x and X conversions on regular ints can never
4330 * produce a '-' sign, but can for Python's unbounded ints.
4331 */
4332PyObject*
4333_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4334 char **pbuf, int *plen)
4335{
4336 PyObject *result = NULL;
4337 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004338 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004339 int sign; /* 1 if '-', else 0 */
4340 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004341 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004342 int numdigits; /* len == numnondigits + numdigits */
4343 int numnondigits = 0;
4344
4345 switch (type) {
4346 case 'd':
4347 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004348 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004349 break;
4350 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004351 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004352 break;
4353 case 'x':
4354 case 'X':
4355 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004356 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004357 break;
4358 default:
4359 assert(!"'type' not in [duoxX]");
4360 }
4361 if (!result)
4362 return NULL;
4363
Neal Norwitz56423e52006-08-13 18:11:08 +00004364 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004365 if (!buf) {
4366 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004367 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004368 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004369
Tim Peters38fd5b62000-09-21 05:43:11 +00004370 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004371 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004372 PyErr_BadInternalCall();
4373 return NULL;
4374 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004375 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004376 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004377 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4378 return NULL;
4379 }
4380 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004381 if (buf[len-1] == 'L') {
4382 --len;
4383 buf[len] = '\0';
4384 }
4385 sign = buf[0] == '-';
4386 numnondigits += sign;
4387 numdigits = len - numnondigits;
4388 assert(numdigits > 0);
4389
Tim Petersfff53252001-04-12 18:38:48 +00004390 /* Get rid of base marker unless F_ALT */
4391 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004392 /* Need to skip 0x, 0X or 0. */
4393 int skipped = 0;
4394 switch (type) {
4395 case 'o':
4396 assert(buf[sign] == '0');
4397 /* If 0 is only digit, leave it alone. */
4398 if (numdigits > 1) {
4399 skipped = 1;
4400 --numdigits;
4401 }
4402 break;
4403 case 'x':
4404 case 'X':
4405 assert(buf[sign] == '0');
4406 assert(buf[sign + 1] == 'x');
4407 skipped = 2;
4408 numnondigits -= 2;
4409 break;
4410 }
4411 if (skipped) {
4412 buf += skipped;
4413 len -= skipped;
4414 if (sign)
4415 buf[0] = '-';
4416 }
4417 assert(len == numnondigits + numdigits);
4418 assert(numdigits > 0);
4419 }
4420
4421 /* Fill with leading zeroes to meet minimum width. */
4422 if (prec > numdigits) {
4423 PyObject *r1 = PyString_FromStringAndSize(NULL,
4424 numnondigits + prec);
4425 char *b1;
4426 if (!r1) {
4427 Py_DECREF(result);
4428 return NULL;
4429 }
4430 b1 = PyString_AS_STRING(r1);
4431 for (i = 0; i < numnondigits; ++i)
4432 *b1++ = *buf++;
4433 for (i = 0; i < prec - numdigits; i++)
4434 *b1++ = '0';
4435 for (i = 0; i < numdigits; i++)
4436 *b1++ = *buf++;
4437 *b1 = '\0';
4438 Py_DECREF(result);
4439 result = r1;
4440 buf = PyString_AS_STRING(result);
4441 len = numnondigits + prec;
4442 }
4443
4444 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004445 if (type == 'X') {
4446 /* Need to convert all lower case letters to upper case.
4447 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004448 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004449 if (buf[i] >= 'a' && buf[i] <= 'x')
4450 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004451 }
4452 *pbuf = buf;
4453 *plen = len;
4454 return result;
4455}
4456
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004457Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004458formatint(char *buf, size_t buflen, int flags,
4459 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004460{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004461 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004462 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4463 + 1 + 1 = 24 */
4464 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004465 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004466 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004467
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004468 x = PyInt_AsLong(v);
4469 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004470 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004471 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004472 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004473 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004474 if (x < 0 && type == 'u') {
4475 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004476 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004477 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4478 sign = "-";
4479 else
4480 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004481 if (prec < 0)
4482 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004483
4484 if ((flags & F_ALT) &&
4485 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004486 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004487 * of issues that cause pain:
4488 * - when 0 is being converted, the C standard leaves off
4489 * the '0x' or '0X', which is inconsistent with other
4490 * %#x/%#X conversions and inconsistent with Python's
4491 * hex() function
4492 * - there are platforms that violate the standard and
4493 * convert 0 with the '0x' or '0X'
4494 * (Metrowerks, Compaq Tru64)
4495 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004496 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004497 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004498 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004499 * We can achieve the desired consistency by inserting our
4500 * own '0x' or '0X' prefix, and substituting %x/%X in place
4501 * of %#x/%#X.
4502 *
4503 * Note that this is the same approach as used in
4504 * formatint() in unicodeobject.c
4505 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004506 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4507 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004508 }
4509 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004510 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4511 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004512 prec, type);
4513 }
4514
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004515 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4516 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004517 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004518 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004519 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004520 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004521 return -1;
4522 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004523 if (sign[0])
4524 PyOS_snprintf(buf, buflen, fmt, -x);
4525 else
4526 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004527 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004528}
4529
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004530Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004531formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004532{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004533 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 if (PyString_Check(v)) {
4535 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004536 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004537 }
4538 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004539 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004540 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004541 }
4542 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004543 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004544}
4545
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004546/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4547
4548 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4549 chars are formatted. XXX This is a magic number. Each formatting
4550 routine does bounds checking to ensure no overflow, but a better
4551 solution may be to malloc a buffer of appropriate size for each
4552 format. For now, the current solution is sufficient.
4553*/
4554#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004555
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004556PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004557PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004558{
4559 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004560 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004561 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004562 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004563 PyObject *result, *orig_args;
4564#ifdef Py_USING_UNICODE
4565 PyObject *v, *w;
4566#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004567 PyObject *dict = NULL;
4568 if (format == NULL || !PyString_Check(format) || args == NULL) {
4569 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004570 return NULL;
4571 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004572 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004573 fmt = PyString_AS_STRING(format);
4574 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004575 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004577 if (result == NULL)
4578 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004579 res = PyString_AsString(result);
4580 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004581 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004582 argidx = 0;
4583 }
4584 else {
4585 arglen = -1;
4586 argidx = -2;
4587 }
Christian Heimese93237d2007-12-19 02:37:44 +00004588 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004589 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004590 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004591 while (--fmtcnt >= 0) {
4592 if (*fmt != '%') {
4593 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004594 rescnt = fmtcnt + 100;
4595 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004597 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004598 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004599 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004600 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004601 }
4602 *res++ = *fmt++;
4603 }
4604 else {
4605 /* Got a format specifier */
4606 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004607 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004608 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004609 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004610 int fill;
Facundo Batistac11cecf2008-02-24 03:17:21 +00004611 int isnumok;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004612 PyObject *v = NULL;
4613 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004614 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004615 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004616 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004617 char formatbuf[FORMATBUFLEN];
4618 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004619#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004620 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004621 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004622#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004623
Guido van Rossumda9c2711996-12-05 21:58:58 +00004624 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004625 if (*fmt == '(') {
4626 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004627 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004628 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004629 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004630
4631 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004632 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004633 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004634 goto error;
4635 }
4636 ++fmt;
4637 --fmtcnt;
4638 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004639 /* Skip over balanced parentheses */
4640 while (pcount > 0 && --fmtcnt >= 0) {
4641 if (*fmt == ')')
4642 --pcount;
4643 else if (*fmt == '(')
4644 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004645 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004646 }
4647 keylen = fmt - keystart - 1;
4648 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004650 "incomplete format key");
4651 goto error;
4652 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004653 key = PyString_FromStringAndSize(keystart,
4654 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004655 if (key == NULL)
4656 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004657 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004658 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004659 args_owned = 0;
4660 }
4661 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004662 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004663 if (args == NULL) {
4664 goto error;
4665 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004666 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004667 arglen = -1;
4668 argidx = -2;
4669 }
Guido van Rossume5372401993-03-16 12:15:04 +00004670 while (--fmtcnt >= 0) {
4671 switch (c = *fmt++) {
4672 case '-': flags |= F_LJUST; continue;
4673 case '+': flags |= F_SIGN; continue;
4674 case ' ': flags |= F_BLANK; continue;
4675 case '#': flags |= F_ALT; continue;
4676 case '0': flags |= F_ZERO; continue;
4677 }
4678 break;
4679 }
4680 if (c == '*') {
4681 v = getnextarg(args, arglen, &argidx);
4682 if (v == NULL)
4683 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004684 if (!PyInt_Check(v)) {
4685 PyErr_SetString(PyExc_TypeError,
4686 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004687 goto error;
4688 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004689 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004690 if (width < 0) {
4691 flags |= F_LJUST;
4692 width = -width;
4693 }
Guido van Rossume5372401993-03-16 12:15:04 +00004694 if (--fmtcnt >= 0)
4695 c = *fmt++;
4696 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004697 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004698 width = c - '0';
4699 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004700 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004701 if (!isdigit(c))
4702 break;
4703 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004704 PyErr_SetString(
4705 PyExc_ValueError,
4706 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004707 goto error;
4708 }
4709 width = width*10 + (c - '0');
4710 }
4711 }
4712 if (c == '.') {
4713 prec = 0;
4714 if (--fmtcnt >= 0)
4715 c = *fmt++;
4716 if (c == '*') {
4717 v = getnextarg(args, arglen, &argidx);
4718 if (v == NULL)
4719 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004720 if (!PyInt_Check(v)) {
4721 PyErr_SetString(
4722 PyExc_TypeError,
4723 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004724 goto error;
4725 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004726 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004727 if (prec < 0)
4728 prec = 0;
4729 if (--fmtcnt >= 0)
4730 c = *fmt++;
4731 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004732 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004733 prec = c - '0';
4734 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004735 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004736 if (!isdigit(c))
4737 break;
4738 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004739 PyErr_SetString(
4740 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004741 "prec too big");
4742 goto error;
4743 }
4744 prec = prec*10 + (c - '0');
4745 }
4746 }
4747 } /* prec */
4748 if (fmtcnt >= 0) {
4749 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004750 if (--fmtcnt >= 0)
4751 c = *fmt++;
4752 }
4753 }
4754 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004755 PyErr_SetString(PyExc_ValueError,
4756 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004757 goto error;
4758 }
4759 if (c != '%') {
4760 v = getnextarg(args, arglen, &argidx);
4761 if (v == NULL)
4762 goto error;
4763 }
4764 sign = 0;
4765 fill = ' ';
4766 switch (c) {
4767 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004768 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004769 len = 1;
4770 break;
4771 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004772#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004773 if (PyUnicode_Check(v)) {
4774 fmt = fmt_start;
4775 argidx = argidx_start;
4776 goto unicode;
4777 }
Georg Brandld45014b2005-10-01 17:06:00 +00004778#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004779 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004780#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004781 if (temp != NULL && PyUnicode_Check(temp)) {
4782 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004783 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004784 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004785 goto unicode;
4786 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004787#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004788 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004789 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004790 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004791 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004792 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004793 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004794 if (!PyString_Check(temp)) {
4795 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004796 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004797 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004798 goto error;
4799 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004800 pbuf = PyString_AS_STRING(temp);
4801 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004802 if (prec >= 0 && len > prec)
4803 len = prec;
4804 break;
4805 case 'i':
4806 case 'd':
4807 case 'u':
4808 case 'o':
4809 case 'x':
4810 case 'X':
4811 if (c == 'i')
4812 c = 'd';
Facundo Batistac11cecf2008-02-24 03:17:21 +00004813 isnumok = 0;
4814 if (PyNumber_Check(v)) {
4815 PyObject *iobj=NULL;
4816
4817 if (PyInt_Check(v) || (PyLong_Check(v))) {
4818 iobj = v;
4819 Py_INCREF(iobj);
4820 }
4821 else {
4822 iobj = PyNumber_Int(v);
4823 if (iobj==NULL) iobj = PyNumber_Long(v);
4824 }
4825 if (iobj!=NULL) {
4826 if (PyInt_Check(iobj)) {
4827 isnumok = 1;
4828 pbuf = formatbuf;
4829 len = formatint(pbuf,
4830 sizeof(formatbuf),
4831 flags, prec, c, iobj);
4832 Py_DECREF(iobj);
4833 if (len < 0)
4834 goto error;
4835 sign = 1;
4836 }
4837 else if (PyLong_Check(iobj)) {
4838 int ilen;
4839
4840 isnumok = 1;
4841 temp = _PyString_FormatLong(iobj, flags,
4842 prec, c, &pbuf, &ilen);
4843 Py_DECREF(iobj);
4844 len = ilen;
4845 if (!temp)
4846 goto error;
4847 sign = 1;
4848 }
4849 else {
4850 Py_DECREF(iobj);
4851 }
4852 }
Guido van Rossum4acdc231997-01-29 06:00:24 +00004853 }
Facundo Batistac11cecf2008-02-24 03:17:21 +00004854 if (!isnumok) {
4855 PyErr_Format(PyExc_TypeError,
4856 "%%%c format: a number is required, "
4857 "not %.200s", c, Py_TYPE(v)->tp_name);
4858 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004859 }
4860 if (flags & F_ZERO)
4861 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004862 break;
4863 case 'e':
4864 case 'E':
4865 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004866 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004867 case 'g':
4868 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004869 if (c == 'F')
4870 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004871 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004872 len = formatfloat(pbuf, sizeof(formatbuf),
4873 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004874 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004875 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004876 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004877 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004878 fill = '0';
4879 break;
4880 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004881#ifdef Py_USING_UNICODE
4882 if (PyUnicode_Check(v)) {
4883 fmt = fmt_start;
4884 argidx = argidx_start;
4885 goto unicode;
4886 }
4887#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004888 pbuf = formatbuf;
4889 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004890 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004891 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004892 break;
4893 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004894 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004895 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004896 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004897 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004898 (Py_ssize_t)(fmt - 1 -
4899 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004900 goto error;
4901 }
4902 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004903 if (*pbuf == '-' || *pbuf == '+') {
4904 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004905 len--;
4906 }
4907 else if (flags & F_SIGN)
4908 sign = '+';
4909 else if (flags & F_BLANK)
4910 sign = ' ';
4911 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004912 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004913 }
4914 if (width < len)
4915 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004916 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004917 reslen -= rescnt;
4918 rescnt = width + fmtcnt + 100;
4919 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004920 if (reslen < 0) {
4921 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004922 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004923 return PyErr_NoMemory();
4924 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004925 if (_PyString_Resize(&result, reslen) < 0) {
4926 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004927 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004928 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004929 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004930 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004931 }
4932 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004933 if (fill != ' ')
4934 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004935 rescnt--;
4936 if (width > len)
4937 width--;
4938 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004939 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4940 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004941 assert(pbuf[1] == c);
4942 if (fill != ' ') {
4943 *res++ = *pbuf++;
4944 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004945 }
Tim Petersfff53252001-04-12 18:38:48 +00004946 rescnt -= 2;
4947 width -= 2;
4948 if (width < 0)
4949 width = 0;
4950 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004951 }
4952 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004953 do {
4954 --rescnt;
4955 *res++ = fill;
4956 } while (--width > len);
4957 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004958 if (fill == ' ') {
4959 if (sign)
4960 *res++ = sign;
4961 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004962 (c == 'x' || c == 'X')) {
4963 assert(pbuf[0] == '0');
4964 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004965 *res++ = *pbuf++;
4966 *res++ = *pbuf++;
4967 }
4968 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004969 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004970 res += len;
4971 rescnt -= len;
4972 while (--width >= len) {
4973 --rescnt;
4974 *res++ = ' ';
4975 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004976 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004977 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004978 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004979 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004980 goto error;
4981 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004982 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004983 } /* '%' */
4984 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004985 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004986 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004987 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004988 goto error;
4989 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004990 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004991 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004992 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004993 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004994 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004995
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004996#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004997 unicode:
4998 if (args_owned) {
4999 Py_DECREF(args);
5000 args_owned = 0;
5001 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00005002 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00005003 if (PyTuple_Check(orig_args) && argidx > 0) {
5004 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00005005 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00005006 v = PyTuple_New(n);
5007 if (v == NULL)
5008 goto error;
5009 while (--n >= 0) {
5010 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5011 Py_INCREF(w);
5012 PyTuple_SET_ITEM(v, n, w);
5013 }
5014 args = v;
5015 } else {
5016 Py_INCREF(orig_args);
5017 args = orig_args;
5018 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005019 args_owned = 1;
5020 /* Take what we have of the result and let the Unicode formatting
5021 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00005022 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005023 if (_PyString_Resize(&result, rescnt))
5024 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00005025 fmtcnt = PyString_GET_SIZE(format) - \
5026 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005027 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5028 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00005029 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005030 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00005031 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005032 if (v == NULL)
5033 goto error;
5034 /* Paste what we have (result) to what the Unicode formatting
5035 function returned (v) and return the result (or error) */
5036 w = PyUnicode_Concat(result, v);
5037 Py_DECREF(result);
5038 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005039 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005040 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005041#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005042
Guido van Rossume5372401993-03-16 12:15:04 +00005043 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005044 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005045 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005046 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005047 }
Guido van Rossume5372401993-03-16 12:15:04 +00005048 return NULL;
5049}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005050
Guido van Rossum2a61e741997-01-18 07:55:05 +00005051void
Fred Drakeba096332000-07-09 07:04:36 +00005052PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005053{
5054 register PyStringObject *s = (PyStringObject *)(*p);
5055 PyObject *t;
5056 if (s == NULL || !PyString_Check(s))
5057 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005058 /* If it's a string subclass, we don't really know what putting
5059 it in the interned dict might do. */
5060 if (!PyString_CheckExact(s))
5061 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005062 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005063 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005064 if (interned == NULL) {
5065 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005066 if (interned == NULL) {
5067 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005068 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005069 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005070 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005071 t = PyDict_GetItem(interned, (PyObject *)s);
5072 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005073 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005074 Py_DECREF(*p);
5075 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005076 return;
5077 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005078
Armin Rigo79f7ad22004-08-07 19:27:39 +00005079 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005080 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005081 return;
5082 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005083 /* The two references in interned are not counted by refcnt.
5084 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005085 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005086 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005087}
5088
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005089void
5090PyString_InternImmortal(PyObject **p)
5091{
5092 PyString_InternInPlace(p);
5093 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5094 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5095 Py_INCREF(*p);
5096 }
5097}
5098
Guido van Rossum2a61e741997-01-18 07:55:05 +00005099
5100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005101PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005102{
5103 PyObject *s = PyString_FromString(cp);
5104 if (s == NULL)
5105 return NULL;
5106 PyString_InternInPlace(&s);
5107 return s;
5108}
5109
Guido van Rossum8cf04761997-08-02 02:57:45 +00005110void
Fred Drakeba096332000-07-09 07:04:36 +00005111PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005112{
5113 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005114 for (i = 0; i < UCHAR_MAX + 1; i++) {
5115 Py_XDECREF(characters[i]);
5116 characters[i] = NULL;
5117 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005118 Py_XDECREF(nullstring);
5119 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005120}
Barry Warsawa903ad982001-02-23 16:40:48 +00005121
Barry Warsawa903ad982001-02-23 16:40:48 +00005122void _Py_ReleaseInternedStrings(void)
5123{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005124 PyObject *keys;
5125 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005126 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005127 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005128
5129 if (interned == NULL || !PyDict_Check(interned))
5130 return;
5131 keys = PyDict_Keys(interned);
5132 if (keys == NULL || !PyList_Check(keys)) {
5133 PyErr_Clear();
5134 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005135 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005136
5137 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5138 detector, interned strings are not forcibly deallocated; rather, we
5139 give them their stolen references back, and then clear and DECREF
5140 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005141
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005142 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005143 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5144 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005145 for (i = 0; i < n; i++) {
5146 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5147 switch (s->ob_sstate) {
5148 case SSTATE_NOT_INTERNED:
5149 /* XXX Shouldn't happen */
5150 break;
5151 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005152 Py_REFCNT(s) += 1;
5153 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005154 break;
5155 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005156 Py_REFCNT(s) += 2;
5157 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005158 break;
5159 default:
5160 Py_FatalError("Inconsistent interned string state.");
5161 }
5162 s->ob_sstate = SSTATE_NOT_INTERNED;
5163 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005164 fprintf(stderr, "total size of all interned strings: "
5165 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5166 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005167 Py_DECREF(keys);
5168 PyDict_Clear(interned);
5169 Py_DECREF(interned);
5170 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005171}