blob: 9d518541ec7f9760686240b58870a16c696b3cb9 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Eric Smitha9f7d622008-02-17 19:46:49 +00007#include "formatter_string.h"
8
Guido van Rossum013142a1994-08-30 08:19:36 +00009#include <ctype.h>
10
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000011#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
Tim Petersae1d0c92006-03-17 03:29:34 +000023 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000024 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000028/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000029 For both PyString_FromString() and PyString_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000034 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000035
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000036 For PyString_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyString_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyString_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000053*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000055PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Tim Peters9e897f42001-05-09 07:37:07 +000057 register PyStringObject *op;
Gregory P. Smithc00eb732008-04-09 23:16:37 +000058 if (size < 0) {
59 PyErr_SetString(PyExc_SystemError,
60 "Negative size passed to PyString_FromStringAndSize");
61 return NULL;
62 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 if (size == 0 && (op = nullstring) != NULL) {
64#ifdef COUNT_ALLOCS
65 null_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (size == 1 && str != NULL &&
71 (op = characters[*str & UCHAR_MAX]) != NULL)
72 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000079
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000080 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000081 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000084 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000086 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000088 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000090 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105}
106
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000108PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Tim Peters62de65b2001-12-06 20:29:32 +0000110 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000111 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000112
113 assert(str != NULL);
114 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000115 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000116 PyErr_SetString(PyExc_OverflowError,
117 "string is too long for a Python string");
118 return NULL;
119 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 if (size == 0 && (op = nullstring) != NULL) {
121#ifdef COUNT_ALLOCS
122 null_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
127 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
128#ifdef COUNT_ALLOCS
129 one_strings++;
130#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000135 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000136 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000137 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000139 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000141 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000142 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000143 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158}
159
Barry Warsawdadace02001-08-24 18:32:06 +0000160PyObject *
161PyString_FromFormatV(const char *format, va_list vargs)
162{
Tim Petersc15c4f12001-10-02 21:32:07 +0000163 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000164 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000165 const char* f;
166 char *s;
167 PyObject* string;
168
Tim Petersc15c4f12001-10-02 21:32:07 +0000169#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000170 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000171#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#ifdef __va_copy
173 __va_copy(count, vargs);
174#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000175 count = vargs;
176#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000177#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000178 /* step 1: figure out how large a buffer we need */
179 for (f = format; *f; f++) {
180 if (*f == '%') {
181 const char* p = f;
182 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
183 ;
184
Tim Peters8931ff12006-05-13 23:28:20 +0000185 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
186 * they don't affect the amount of space we reserve.
187 */
188 if ((*f == 'l' || *f == 'z') &&
189 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000190 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000191
Barry Warsawdadace02001-08-24 18:32:06 +0000192 switch (*f) {
193 case 'c':
194 (void)va_arg(count, int);
195 /* fall through... */
196 case '%':
197 n++;
198 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000199 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000200 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000201 /* 20 bytes is enough to hold a 64-bit
202 integer. Decimal takes the most space.
203 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000204 n += 20;
205 break;
206 case 's':
207 s = va_arg(count, char*);
208 n += strlen(s);
209 break;
210 case 'p':
211 (void) va_arg(count, int);
212 /* maximum 64-bit pointer representation:
213 * 0xffffffffffffffff
214 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000215 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000216 */
217 n += 19;
218 break;
219 default:
220 /* if we stumble upon an unknown
221 formatting code, copy the rest of
222 the format string to the output
223 string. (we cannot just skip the
224 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000225 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000226 n += strlen(p);
227 goto expand;
228 }
229 } else
230 n++;
231 }
232 expand:
233 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000234 /* Since we've analyzed how much space we need for the worst case,
235 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000236 string = PyString_FromStringAndSize(NULL, n);
237 if (!string)
238 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000239
Barry Warsawdadace02001-08-24 18:32:06 +0000240 s = PyString_AsString(string);
241
242 for (f = format; *f; f++) {
243 if (*f == '%') {
244 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000245 Py_ssize_t i;
246 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000247 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000248 /* parse the width.precision part (we're only
249 interested in the precision value, if any) */
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 if (*f == '.') {
254 f++;
255 n = 0;
256 while (isdigit(Py_CHARMASK(*f)))
257 n = (n*10) + *f++ - '0';
258 }
259 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
260 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000261 /* handle the long flag, but only for %ld and %lu.
262 others can be added when necessary. */
263 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000264 longflag = 1;
265 ++f;
266 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000267 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000268 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000269 size_tflag = 1;
270 ++f;
271 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000272
Barry Warsawdadace02001-08-24 18:32:06 +0000273 switch (*f) {
274 case 'c':
275 *s++ = va_arg(vargs, int);
276 break;
277 case 'd':
278 if (longflag)
279 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000280 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000281 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
282 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000283 else
284 sprintf(s, "%d", va_arg(vargs, int));
285 s += strlen(s);
286 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000287 case 'u':
288 if (longflag)
289 sprintf(s, "%lu",
290 va_arg(vargs, unsigned long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
293 va_arg(vargs, size_t));
294 else
295 sprintf(s, "%u",
296 va_arg(vargs, unsigned int));
297 s += strlen(s);
298 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000299 case 'i':
300 sprintf(s, "%i", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'x':
304 sprintf(s, "%x", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 's':
308 p = va_arg(vargs, char*);
309 i = strlen(p);
310 if (n > 0 && i > n)
311 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000312 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000313 s += i;
314 break;
315 case 'p':
316 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000317 /* %p is ill-defined: ensure leading 0x. */
318 if (s[1] == 'X')
319 s[1] = 'x';
320 else if (s[1] != 'x') {
321 memmove(s+2, s, strlen(s)+1);
322 s[0] = '0';
323 s[1] = 'x';
324 }
Barry Warsawdadace02001-08-24 18:32:06 +0000325 s += strlen(s);
326 break;
327 case '%':
328 *s++ = '%';
329 break;
330 default:
331 strcpy(s, p);
332 s += strlen(s);
333 goto end;
334 }
335 } else
336 *s++ = *f;
337 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000341 return string;
342}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000343
Barry Warsawdadace02001-08-24 18:32:06 +0000344PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000345PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000346{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000347 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000348 va_list vargs;
349
350#ifdef HAVE_STDARG_PROTOTYPES
351 va_start(vargs, format);
352#else
353 va_start(vargs);
354#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000355 ret = PyString_FromFormatV(format, vargs);
356 va_end(vargs);
357 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000358}
359
360
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000362 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363 const char *encoding,
364 const char *errors)
365{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000366 PyObject *v, *str;
367
368 str = PyString_FromStringAndSize(s, size);
369 if (str == NULL)
370 return NULL;
371 v = PyString_AsDecodedString(str, encoding, errors);
372 Py_DECREF(str);
373 return v;
374}
375
376PyObject *PyString_AsDecodedObject(PyObject *str,
377 const char *encoding,
378 const char *errors)
379{
380 PyObject *v;
381
382 if (!PyString_Check(str)) {
383 PyErr_BadArgument();
384 goto onError;
385 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387 if (encoding == NULL) {
388#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000390#else
391 PyErr_SetString(PyExc_ValueError, "no encoding specified");
392 goto onError;
393#endif
394 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395
396 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 v = PyCodec_Decode(str, encoding, errors);
398 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400
401 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000404 return NULL;
405}
406
407PyObject *PyString_AsDecodedString(PyObject *str,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v;
412
413 v = PyString_AsDecodedObject(str, encoding, errors);
414 if (v == NULL)
415 goto onError;
416
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000417#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 /* Convert Unicode to a string using the default encoding */
419 if (PyUnicode_Check(v)) {
420 PyObject *temp = v;
421 v = PyUnicode_AsEncodedString(v, NULL, NULL);
422 Py_DECREF(temp);
423 if (v == NULL)
424 goto onError;
425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000426#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427 if (!PyString_Check(v)) {
428 PyErr_Format(PyExc_TypeError,
429 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000430 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000431 Py_DECREF(v);
432 goto onError;
433 }
434
435 return v;
436
437 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 return NULL;
439}
440
441PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000442 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000447
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000448 str = PyString_FromStringAndSize(s, size);
449 if (str == NULL)
450 return NULL;
451 v = PyString_AsEncodedString(str, encoding, errors);
452 Py_DECREF(str);
453 return v;
454}
455
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 const char *encoding,
458 const char *errors)
459{
460 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000461
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(str)) {
463 PyErr_BadArgument();
464 goto onError;
465 }
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467 if (encoding == NULL) {
468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000470#else
471 PyErr_SetString(PyExc_ValueError, "no encoding specified");
472 goto onError;
473#endif
474 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475
476 /* Encode via the codec registry */
477 v = PyCodec_Encode(str, encoding, errors);
478 if (v == NULL)
479 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000480
481 return v;
482
483 onError:
484 return NULL;
485}
486
487PyObject *PyString_AsEncodedString(PyObject *str,
488 const char *encoding,
489 const char *errors)
490{
491 PyObject *v;
492
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000493 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494 if (v == NULL)
495 goto onError;
496
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000497#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000498 /* Convert Unicode to a string using the default encoding */
499 if (PyUnicode_Check(v)) {
500 PyObject *temp = v;
501 v = PyUnicode_AsEncodedString(v, NULL, NULL);
502 Py_DECREF(temp);
503 if (v == NULL)
504 goto onError;
505 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000506#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 if (!PyString_Check(v)) {
508 PyErr_Format(PyExc_TypeError,
509 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000510 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 Py_DECREF(v);
512 goto onError;
513 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000514
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000515 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000516
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000517 onError:
518 return NULL;
519}
520
Guido van Rossum234f9421993-06-17 12:35:49 +0000521static void
Fred Drakeba096332000-07-09 07:04:36 +0000522string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000524 switch (PyString_CHECK_INTERNED(op)) {
525 case SSTATE_NOT_INTERNED:
526 break;
527
528 case SSTATE_INTERNED_MORTAL:
529 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000530 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000531 if (PyDict_DelItem(interned, op) != 0)
532 Py_FatalError(
533 "deletion of interned string failed");
534 break;
535
536 case SSTATE_INTERNED_IMMORTAL:
537 Py_FatalError("Immortal interned string died.");
538
539 default:
540 Py_FatalError("Inconsistent interned string state.");
541 }
Christian Heimese93237d2007-12-19 02:37:44 +0000542 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000543}
544
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545/* Unescape a backslash-escaped string. If unicode is non-zero,
546 the string is a u-literal. If recode_encoding is non-zero,
547 the string is UTF-8 encoded and should be re-encoded in the
548 specified encoding. */
549
550PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000551 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000552 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 const char *recode_encoding)
555{
556 int c;
557 char *p, *buf;
558 const char *end;
559 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000560 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000561 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000562 if (v == NULL)
563 return NULL;
564 p = buf = PyString_AsString(v);
565 end = s + len;
566 while (s < end) {
567 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000568 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569#ifdef Py_USING_UNICODE
570 if (recode_encoding && (*s & 0x80)) {
571 PyObject *u, *w;
572 char *r;
573 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000574 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575 t = s;
576 /* Decode non-ASCII bytes as UTF-8. */
577 while (t < end && (*t & 0x80)) t++;
578 u = PyUnicode_DecodeUTF8(s, t - s, errors);
579 if(!u) goto failed;
580
581 /* Recode them in target encoding. */
582 w = PyUnicode_AsEncodedString(
583 u, recode_encoding, errors);
584 Py_DECREF(u);
585 if (!w) goto failed;
586
587 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000588 assert(PyString_Check(w));
589 r = PyString_AS_STRING(w);
590 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000591 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000592 p += rn;
593 Py_DECREF(w);
594 s = t;
595 } else {
596 *p++ = *s++;
597 }
598#else
599 *p++ = *s++;
600#endif
601 continue;
602 }
603 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000604 if (s==end) {
605 PyErr_SetString(PyExc_ValueError,
606 "Trailing \\ in string");
607 goto failed;
608 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000609 switch (*s++) {
610 /* XXX This assumes ASCII! */
611 case '\n': break;
612 case '\\': *p++ = '\\'; break;
613 case '\'': *p++ = '\''; break;
614 case '\"': *p++ = '\"'; break;
615 case 'b': *p++ = '\b'; break;
616 case 'f': *p++ = '\014'; break; /* FF */
617 case 't': *p++ = '\t'; break;
618 case 'n': *p++ = '\n'; break;
619 case 'r': *p++ = '\r'; break;
620 case 'v': *p++ = '\013'; break; /* VT */
621 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
622 case '0': case '1': case '2': case '3':
623 case '4': case '5': case '6': case '7':
624 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000625 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 c = (c<<3) + *s++ - '0';
629 }
630 *p++ = c;
631 break;
632 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000633 if (s+1 < end &&
634 isxdigit(Py_CHARMASK(s[0])) &&
635 isxdigit(Py_CHARMASK(s[1])))
636 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000637 unsigned int x = 0;
638 c = Py_CHARMASK(*s);
639 s++;
640 if (isdigit(c))
641 x = c - '0';
642 else if (islower(c))
643 x = 10 + c - 'a';
644 else
645 x = 10 + c - 'A';
646 x = x << 4;
647 c = Py_CHARMASK(*s);
648 s++;
649 if (isdigit(c))
650 x += c - '0';
651 else if (islower(c))
652 x += 10 + c - 'a';
653 else
654 x += 10 + c - 'A';
655 *p++ = x;
656 break;
657 }
658 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000659 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000660 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000661 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 }
663 if (strcmp(errors, "replace") == 0) {
664 *p++ = '?';
665 } else if (strcmp(errors, "ignore") == 0)
666 /* do nothing */;
667 else {
668 PyErr_Format(PyExc_ValueError,
669 "decoding error; "
670 "unknown error handling code: %.400s",
671 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000672 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674#ifndef Py_USING_UNICODE
675 case 'u':
676 case 'U':
677 case 'N':
678 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000679 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 "Unicode escapes not legal "
681 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000682 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684#endif
685 default:
686 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000687 s--;
688 goto non_esc; /* an arbitry number of unescaped
689 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000690 }
691 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000692 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 return v;
695 failed:
696 Py_DECREF(v);
697 return NULL;
698}
699
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000700/* -------------------------------------------------------------------- */
701/* object api */
702
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704string_getsize(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return -1;
710 return len;
711}
712
713static /*const*/ char *
714string_getbuffer(register PyObject *op)
715{
716 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyString_AsStringAndSize(op, &s, &len))
719 return NULL;
720 return s;
721}
722
Martin v. Löwis18e16552006-02-15 17:27:45 +0000723Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000728 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
731/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000732PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000734 if (!PyString_Check(op))
735 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737}
738
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739int
740PyString_AsStringAndSize(register PyObject *obj,
741 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000742 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743{
744 if (s == NULL) {
745 PyErr_BadInternalCall();
746 return -1;
747 }
748
749 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000750#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 if (PyUnicode_Check(obj)) {
752 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
753 if (obj == NULL)
754 return -1;
755 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000756 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000757#endif
758 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_Format(PyExc_TypeError,
760 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000761 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 return -1;
763 }
764 }
765
766 *s = PyString_AS_STRING(obj);
767 if (len != NULL)
768 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000769 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000770 PyErr_SetString(PyExc_TypeError,
771 "expected string without null bytes");
772 return -1;
773 }
774 return 0;
775}
776
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000778/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000779
Eric Smitha9f7d622008-02-17 19:46:49 +0000780#include "stringlib/stringdefs.h"
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Brett Cannon01531592007-09-17 03:28:34 +0000791 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000808 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000809 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000810 while (size > INT_MAX) {
811 /* Very long strings cannot be written atomically.
812 * But don't write exactly INT_MAX bytes at a time
813 * to avoid memory aligment issues.
814 */
815 const int chunk_size = INT_MAX & ~0x3FFF;
816 fwrite(data, 1, chunk_size, fp);
817 data += chunk_size;
818 size -= chunk_size;
819 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000820#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000821 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#endif
Brett Cannon01531592007-09-17 03:28:34 +0000825 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000831 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
832 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
Christian Heimese93237d2007-12-19 02:37:44 +0000835 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000836 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000838 for (i = 0; i < str_len; i++) {
839 /* Since strings are immutable and the caller should have a
840 reference, accessing the interal buffer should not be an issue
841 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000843 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\r");
851 else if (c < ' ' || c >= 0x7f)
852 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000853 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000857 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000858 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859}
860
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000861PyObject *
862PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000865 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000866 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000867 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyErr_SetString(PyExc_OverflowError,
869 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000870 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000871 }
872 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000874 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 }
876 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000877 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878 register char c;
879 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000880 int quote;
881
Thomas Wouters7e474022000-07-16 12:04:32 +0000882 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000884 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000885 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
886 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 quote = '"';
888
Tim Peters9161c8b2001-12-03 01:55:38 +0000889 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000890 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000891 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000892 /* There's at least enough room for a hex escape
893 and a closing quote. */
894 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000896 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000898 else if (c == '\t')
899 *p++ = '\\', *p++ = 't';
900 else if (c == '\n')
901 *p++ = '\\', *p++ = 'n';
902 else if (c == '\r')
903 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000904 else if (c < ' ' || c >= 0x7f) {
905 /* For performance, we don't want to call
906 PyOS_snprintf here (extra layers of
907 function call). */
908 sprintf(p, "\\x%02x", c & 0xff);
909 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000910 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000911 else
912 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000914 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000915 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000918 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000921}
922
Guido van Rossum189f1df2001-05-01 16:51:53 +0000923static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000924string_repr(PyObject *op)
925{
926 return PyString_Repr(op, 1);
927}
928
929static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000930string_str(PyObject *s)
931{
Tim Petersc9933152001-10-16 20:18:24 +0000932 assert(PyString_Check(s));
933 if (PyString_CheckExact(s)) {
934 Py_INCREF(s);
935 return s;
936 }
937 else {
938 /* Subtype -- return genuine string with the same value. */
939 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000940 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000941 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000942}
943
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000945string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946{
Christian Heimese93237d2007-12-19 02:37:44 +0000947 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948}
949
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000951string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952{
Andrew Dalke598710c2006-05-25 18:18:39 +0000953 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 register PyStringObject *op;
955 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000956#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000957 if (PyUnicode_Check(bb))
958 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000959#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000960 if (PyBytes_Check(bb))
961 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000963 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000964 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 return NULL;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000969 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000971 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000972 Py_INCREF(bb);
973 return bb;
974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 Py_INCREF(a);
976 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977 }
Christian Heimese93237d2007-12-19 02:37:44 +0000978 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000979 if (size < 0) {
980 PyErr_SetString(PyExc_OverflowError,
981 "strings are too large to concat");
982 return NULL;
983 }
984
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000985 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000986 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000987 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000989 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000990 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000991 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000992 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
993 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000994 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996#undef b
997}
998
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 register Py_ssize_t i;
1003 register Py_ssize_t j;
1004 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001006 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007 if (n < 0)
1008 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001009 /* watch out for overflows: the size can overflow int,
1010 * and the # of bytes needed can overflow size_t
1011 */
Christian Heimese93237d2007-12-19 02:37:44 +00001012 size = Py_SIZE(a) * n;
1013 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001014 PyErr_SetString(PyExc_OverflowError,
1015 "repeated string is too long");
1016 return NULL;
1017 }
Christian Heimese93237d2007-12-19 02:37:44 +00001018 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 Py_INCREF(a);
1020 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 }
Tim Peterse7c05322004-06-27 17:24:49 +00001022 nbytes = (size_t)size;
1023 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "repeated string is too long");
1026 return NULL;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001029 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001030 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001032 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001033 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001034 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001035 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001036 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001037 memset(op->ob_sval, a->ob_sval[0] , n);
1038 return (PyObject *) op;
1039 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i = 0;
1041 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001042 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1043 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 }
1045 while (i < size) {
1046 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001047 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 i += j;
1049 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001050 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
1053/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001056string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001058 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
1060 if (i < 0)
1061 i = 0;
1062 if (j < 0)
1063 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001064 if (j > Py_SIZE(a))
1065 j = Py_SIZE(a);
1066 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001067 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 Py_INCREF(a);
1069 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 }
1071 if (j < i)
1072 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074}
1075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001078{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001079 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (PyUnicode_Check(sub_obj))
1082 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001083#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001085 PyErr_Format(PyExc_TypeError,
1086 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001087 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001088 return -1;
1089 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001090 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001091
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001093}
1094
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001096string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001100 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001101 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001102 return NULL;
1103 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 pchar = a->ob_sval[i];
1105 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001106 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001107 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 else {
1109#ifdef COUNT_ALLOCS
1110 one_strings++;
1111#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001112 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001113 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001114 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115}
1116
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117static PyObject*
1118string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001121 Py_ssize_t len_a, len_b;
1122 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 PyObject *result;
1124
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001125 /* Make sure both arguments are strings. */
1126 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 result = Py_NotImplemented;
1128 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 if (a == b) {
1131 switch (op) {
1132 case Py_EQ:case Py_LE:case Py_GE:
1133 result = Py_True;
1134 goto out;
1135 case Py_NE:case Py_LT:case Py_GT:
1136 result = Py_False;
1137 goto out;
1138 }
1139 }
1140 if (op == Py_EQ) {
1141 /* Supporting Py_NE here as well does not save
1142 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001143 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001144 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001145 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 result = Py_True;
1147 } else {
1148 result = Py_False;
1149 }
1150 goto out;
1151 }
Christian Heimese93237d2007-12-19 02:37:44 +00001152 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 min_len = (len_a < len_b) ? len_a : len_b;
1154 if (min_len > 0) {
1155 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1156 if (c==0)
1157 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001158 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 c = 0;
1160 if (c == 0)
1161 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1162 switch (op) {
1163 case Py_LT: c = c < 0; break;
1164 case Py_LE: c = c <= 0; break;
1165 case Py_EQ: assert(0); break; /* unreachable */
1166 case Py_NE: c = c != 0; break;
1167 case Py_GT: c = c > 0; break;
1168 case Py_GE: c = c >= 0; break;
1169 default:
1170 result = Py_NotImplemented;
1171 goto out;
1172 }
1173 result = c ? Py_True : Py_False;
1174 out:
1175 Py_INCREF(result);
1176 return result;
1177}
1178
1179int
1180_PyString_Eq(PyObject *o1, PyObject *o2)
1181{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001182 PyStringObject *a = (PyStringObject*) o1;
1183 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001184 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001185 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001186 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001187}
1188
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189static long
Fred Drakeba096332000-07-09 07:04:36 +00001190string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 register unsigned char *p;
1194 register long x;
1195
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 if (a->ob_shash != -1)
1197 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001198 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001199 p = (unsigned char *) a->ob_sval;
1200 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001202 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001203 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 if (x == -1)
1205 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001206 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207 return x;
1208}
1209
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210static PyObject*
1211string_subscript(PyStringObject* self, PyObject* item)
1212{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001213 if (PyIndex_Check(item)) {
1214 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 if (i == -1 && PyErr_Occurred())
1216 return NULL;
1217 if (i < 0)
1218 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001219 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 }
1221 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001222 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 char* source_buf;
1224 char* result_buf;
1225 PyObject* result;
1226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 PyString_GET_SIZE(self),
1229 &start, &stop, &step, &slicelength) < 0) {
1230 return NULL;
1231 }
1232
1233 if (slicelength <= 0) {
1234 return PyString_FromStringAndSize("", 0);
1235 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001236 else if (start == 0 && step == 1 &&
1237 slicelength == PyString_GET_SIZE(self) &&
1238 PyString_CheckExact(self)) {
1239 Py_INCREF(self);
1240 return (PyObject *)self;
1241 }
1242 else if (step == 1) {
1243 return PyString_FromStringAndSize(
1244 PyString_AS_STRING(self) + start,
1245 slicelength);
1246 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001265 PyErr_Format(PyExc_TypeError,
1266 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001267 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 return NULL;
1269 }
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001277 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279 }
1280 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001281 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286{
Guido van Rossum045e6881997-09-08 18:30:11 +00001287 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001288 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001289 return -1;
1290}
1291
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292static Py_ssize_t
1293string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294{
1295 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001296 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297 return 1;
1298}
1299
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300static Py_ssize_t
1301string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001302{
1303 if ( index != 0 ) {
1304 PyErr_SetString(PyExc_SystemError,
1305 "accessing non-existent string segment");
1306 return -1;
1307 }
1308 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001309 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001310}
1311
Christian Heimes1a6387e2008-03-26 12:49:49 +00001312static int
1313string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1314{
1315 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1316 0, flags);
1317}
1318
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001319static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001320 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001321 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001322 (ssizeargfunc)string_repeat, /*sq_repeat*/
1323 (ssizeargfunc)string_item, /*sq_item*/
1324 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001325 0, /*sq_ass_item*/
1326 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001327 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001328};
1329
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001330static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001331 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332 (binaryfunc)string_subscript,
1333 0,
1334};
1335
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001336static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001337 (readbufferproc)string_buffer_getreadbuf,
1338 (writebufferproc)string_buffer_getwritebuf,
1339 (segcountproc)string_buffer_getsegcount,
1340 (charbufferproc)string_buffer_getcharbuf,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001341 (getbufferproc)string_buffer_getbuffer,
1342 0, /* XXX */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001343};
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345
1346
1347#define LEFTSTRIP 0
1348#define RIGHTSTRIP 1
1349#define BOTHSTRIP 2
1350
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001351/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001352static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1353
1354#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001355
Andrew Dalke525eab32006-05-26 14:00:45 +00001356
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001357/* Don't call if length < 2 */
1358#define Py_STRING_MATCH(target, offset, pattern, length) \
1359 (target[offset] == pattern[0] && \
1360 target[offset+length-1] == pattern[length-1] && \
1361 !memcmp(target+offset+1, pattern+1, length-2) )
1362
1363
Andrew Dalke525eab32006-05-26 14:00:45 +00001364/* Overallocate the initial list to reduce the number of reallocs for small
1365 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1366 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1367 text (roughly 11 words per line) and field delimited data (usually 1-10
1368 fields). For large strings the split algorithms are bandwidth limited
1369 so increasing the preallocation likely will not improve things.*/
1370
1371#define MAX_PREALLOC 12
1372
1373/* 5 splits gives 6 elements */
1374#define PREALLOC_SIZE(maxsplit) \
1375 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1376
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377#define SPLIT_APPEND(data, left, right) \
1378 str = PyString_FromStringAndSize((data) + (left), \
1379 (right) - (left)); \
1380 if (str == NULL) \
1381 goto onError; \
1382 if (PyList_Append(list, str)) { \
1383 Py_DECREF(str); \
1384 goto onError; \
1385 } \
1386 else \
1387 Py_DECREF(str);
1388
Andrew Dalke02758d62006-05-26 15:21:01 +00001389#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001390 str = PyString_FromStringAndSize((data) + (left), \
1391 (right) - (left)); \
1392 if (str == NULL) \
1393 goto onError; \
1394 if (count < MAX_PREALLOC) { \
1395 PyList_SET_ITEM(list, count, str); \
1396 } else { \
1397 if (PyList_Append(list, str)) { \
1398 Py_DECREF(str); \
1399 goto onError; \
1400 } \
1401 else \
1402 Py_DECREF(str); \
1403 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001404 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001405
1406/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001407#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001408
Andrew Dalke02758d62006-05-26 15:21:01 +00001409#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1410#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1411#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1412#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1413
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001414Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001415split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416{
Skip Montanaro26015492007-12-08 15:33:24 +00001417 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001418 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001420 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
1422 if (list == NULL)
1423 return NULL;
1424
Andrew Dalke02758d62006-05-26 15:21:01 +00001425 i = j = 0;
1426
1427 while (maxsplit-- > 0) {
1428 SKIP_SPACE(s, i, len);
1429 if (i==len) break;
1430 j = i; i++;
1431 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001432 if (j == 0 && i == len && PyString_CheckExact(self)) {
1433 /* No whitespace in self, so just use it as list[0] */
1434 Py_INCREF(self);
1435 PyList_SET_ITEM(list, 0, (PyObject *)self);
1436 count++;
1437 break;
1438 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001439 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001441
1442 if (i < len) {
1443 /* Only occurs when maxsplit was reached */
1444 /* Skip any remaining whitespace and copy to end of string */
1445 SKIP_SPACE(s, i, len);
1446 if (i != len)
1447 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001448 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001449 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 Py_DECREF(list);
1453 return NULL;
1454}
1455
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001456Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001457split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001458{
Skip Montanaro26015492007-12-08 15:33:24 +00001459 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001460 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463
1464 if (list == NULL)
1465 return NULL;
1466
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001467 i = j = 0;
1468 while ((j < len) && (maxcount-- > 0)) {
1469 for(; j<len; j++) {
1470 /* I found that using memchr makes no difference */
1471 if (s[j] == ch) {
1472 SPLIT_ADD(s, i, j);
1473 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001475 }
1476 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 }
Skip Montanaro26015492007-12-08 15:33:24 +00001478 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1479 /* ch not in self, so just use self as list[0] */
1480 Py_INCREF(self);
1481 PyList_SET_ITEM(list, 0, (PyObject *)self);
1482 count++;
1483 }
1484 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001485 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001487 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 return list;
1489
1490 onError:
1491 Py_DECREF(list);
1492 return NULL;
1493}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001495PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496"S.split([sep [,maxsplit]]) -> list of strings\n\
1497\n\
1498Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001500splits are done. If sep is not specified or is None, any\n\
Georg Brandldfb77db2008-05-11 09:11:40 +00001501whitespace string is a separator and leading and trailing\n\
1502whitespace is stripped before splitting.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503
1504static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001505string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001507 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001508 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001509 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001510 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001511#ifdef USE_FAST
1512 Py_ssize_t pos;
1513#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514
Martin v. Löwis9c830762006-04-13 08:37:17 +00001515 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001517 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001518 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001519 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001520 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001521 if (PyString_Check(subobj)) {
1522 sub = PyString_AS_STRING(subobj);
1523 n = PyString_GET_SIZE(subobj);
1524 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001525#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001526 else if (PyUnicode_Check(subobj))
1527 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001528#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001529 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1530 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001531
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 if (n == 0) {
1533 PyErr_SetString(PyExc_ValueError, "empty separator");
1534 return NULL;
1535 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001536 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001537 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538
Andrew Dalke525eab32006-05-26 14:00:45 +00001539 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540 if (list == NULL)
1541 return NULL;
1542
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001543#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001545 while (maxsplit-- > 0) {
1546 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1547 if (pos < 0)
1548 break;
1549 j = i+pos;
1550 SPLIT_ADD(s, i, j);
1551 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001553#else
1554 i = j = 0;
1555 while ((j+n <= len) && (maxsplit-- > 0)) {
1556 for (; j+n <= len; j++) {
1557 if (Py_STRING_MATCH(s, j, sub, n)) {
1558 SPLIT_ADD(s, i, j);
1559 i = j = j + n;
1560 break;
1561 }
1562 }
1563 }
1564#endif
1565 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001566 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 return list;
1568
Andrew Dalke525eab32006-05-26 14:00:45 +00001569 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 Py_DECREF(list);
1571 return NULL;
1572}
1573
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001574PyDoc_STRVAR(partition__doc__,
1575"S.partition(sep) -> (head, sep, tail)\n\
1576\n\
1577Searches for the separator sep in S, and returns the part before it,\n\
1578the separator itself, and the part after it. If the separator is not\n\
1579found, returns S and two empty strings.");
1580
1581static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001582string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001583{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001584 const char *sep;
1585 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001586
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001587 if (PyString_Check(sep_obj)) {
1588 sep = PyString_AS_STRING(sep_obj);
1589 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001590 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001591#ifdef Py_USING_UNICODE
1592 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001593 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001594#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001595 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001596 return NULL;
1597
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001598 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001599 (PyObject*) self,
1600 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1601 sep_obj, sep, sep_len
1602 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001603}
1604
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001605PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001606"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001607\n\
1608Searches for the separator sep in S, starting at the end of S, and returns\n\
1609the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001610separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001611
1612static PyObject *
1613string_rpartition(PyStringObject *self, PyObject *sep_obj)
1614{
1615 const char *sep;
1616 Py_ssize_t sep_len;
1617
1618 if (PyString_Check(sep_obj)) {
1619 sep = PyString_AS_STRING(sep_obj);
1620 sep_len = PyString_GET_SIZE(sep_obj);
1621 }
1622#ifdef Py_USING_UNICODE
1623 else if (PyUnicode_Check(sep_obj))
1624 return PyUnicode_Partition((PyObject *) self, sep_obj);
1625#endif
1626 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1627 return NULL;
1628
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001629 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001630 (PyObject*) self,
1631 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1632 sep_obj, sep, sep_len
1633 );
1634}
1635
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001636Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001637rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001638{
Skip Montanaro26015492007-12-08 15:33:24 +00001639 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001640 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001642 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001643
1644 if (list == NULL)
1645 return NULL;
1646
Andrew Dalke02758d62006-05-26 15:21:01 +00001647 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001648
Andrew Dalke02758d62006-05-26 15:21:01 +00001649 while (maxsplit-- > 0) {
1650 RSKIP_SPACE(s, i);
1651 if (i<0) break;
1652 j = i; i--;
1653 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001654 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1655 /* No whitespace in self, so just use it as list[0] */
1656 Py_INCREF(self);
1657 PyList_SET_ITEM(list, 0, (PyObject *)self);
1658 count++;
1659 break;
1660 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001661 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001663 if (i >= 0) {
1664 /* Only occurs when maxsplit was reached */
1665 /* Skip any remaining whitespace and copy to beginning of string */
1666 RSKIP_SPACE(s, i);
1667 if (i >= 0)
1668 SPLIT_ADD(s, 0, i + 1);
1669
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001670 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001671 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001672 if (PyList_Reverse(list) < 0)
1673 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001675 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676 Py_DECREF(list);
1677 return NULL;
1678}
1679
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001680Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001681rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001682{
Skip Montanaro26015492007-12-08 15:33:24 +00001683 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001684 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001685 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001686 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001687
1688 if (list == NULL)
1689 return NULL;
1690
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001691 i = j = len - 1;
1692 while ((i >= 0) && (maxcount-- > 0)) {
1693 for (; i >= 0; i--) {
1694 if (s[i] == ch) {
1695 SPLIT_ADD(s, i + 1, j + 1);
1696 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001697 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001698 }
1699 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001700 }
Skip Montanaro26015492007-12-08 15:33:24 +00001701 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1702 /* ch not in self, so just use self as list[0] */
1703 Py_INCREF(self);
1704 PyList_SET_ITEM(list, 0, (PyObject *)self);
1705 count++;
1706 }
1707 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001708 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001709 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001710 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001711 if (PyList_Reverse(list) < 0)
1712 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713 return list;
1714
1715 onError:
1716 Py_DECREF(list);
1717 return NULL;
1718}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719
1720PyDoc_STRVAR(rsplit__doc__,
1721"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1722\n\
1723Return a list of the words in the string S, using sep as the\n\
1724delimiter string, starting at the end of the string and working\n\
1725to the front. If maxsplit is given, at most maxsplit splits are\n\
1726done. If sep is not specified or is None, any whitespace string\n\
1727is a separator.");
1728
1729static PyObject *
1730string_rsplit(PyStringObject *self, PyObject *args)
1731{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001732 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001733 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001734 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001735 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736
Martin v. Löwis9c830762006-04-13 08:37:17 +00001737 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001738 return NULL;
1739 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001740 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001741 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001742 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001743 if (PyString_Check(subobj)) {
1744 sub = PyString_AS_STRING(subobj);
1745 n = PyString_GET_SIZE(subobj);
1746 }
1747#ifdef Py_USING_UNICODE
1748 else if (PyUnicode_Check(subobj))
1749 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1750#endif
1751 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1752 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001753
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001754 if (n == 0) {
1755 PyErr_SetString(PyExc_ValueError, "empty separator");
1756 return NULL;
1757 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001758 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001759 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001760
Andrew Dalke525eab32006-05-26 14:00:45 +00001761 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001762 if (list == NULL)
1763 return NULL;
1764
1765 j = len;
1766 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001767
Skip Montanaro26015492007-12-08 15:33:24 +00001768 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001769 while ( (i >= 0) && (maxsplit-- > 0) ) {
1770 for (; i>=0; i--) {
1771 if (Py_STRING_MATCH(s, i, sub, n)) {
1772 SPLIT_ADD(s, i + n, j);
1773 j = i;
1774 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001775 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001776 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001777 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001778 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001779 SPLIT_ADD(s, 0, j);
1780 FIX_PREALLOC_SIZE(list);
1781 if (PyList_Reverse(list) < 0)
1782 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001783 return list;
1784
Andrew Dalke525eab32006-05-26 14:00:45 +00001785onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001786 Py_DECREF(list);
1787 return NULL;
1788}
1789
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792"S.join(sequence) -> string\n\
1793\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001794Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001798string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
1800 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001801 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001804 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001805 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001806 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001807 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808
Tim Peters19fe14e2001-01-19 03:03:47 +00001809 seq = PySequence_Fast(orig, "");
1810 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001811 return NULL;
1812 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001813
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001814 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001815 if (seqlen == 0) {
1816 Py_DECREF(seq);
1817 return PyString_FromString("");
1818 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001820 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001821 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1822 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001824 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001825 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001827
Raymond Hettinger674f2412004-08-23 23:23:54 +00001828 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001829 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001830 * Do a pre-pass to figure out the total amount of space we'll
1831 * need (sz), see whether any argument is absurd, and defer to
1832 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001833 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001834 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001835 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001836 item = PySequence_Fast_GET_ITEM(seq, i);
1837 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001838#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001839 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001840 /* Defer to Unicode join.
1841 * CAUTION: There's no gurantee that the
1842 * original sequence can be iterated over
1843 * again, so we must pass seq here.
1844 */
1845 PyObject *result;
1846 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001847 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001848 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001849 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001850#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001851 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001852 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001853 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001854 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001855 Py_DECREF(seq);
1856 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001857 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001858 sz += PyString_GET_SIZE(item);
1859 if (i != 0)
1860 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001861 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001862 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001863 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001864 Py_DECREF(seq);
1865 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001867 }
1868
1869 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001870 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001871 if (res == NULL) {
1872 Py_DECREF(seq);
1873 return NULL;
1874 }
1875
1876 /* Catenate everything. */
1877 p = PyString_AS_STRING(res);
1878 for (i = 0; i < seqlen; ++i) {
1879 size_t n;
1880 item = PySequence_Fast_GET_ITEM(seq, i);
1881 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001882 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001883 p += n;
1884 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001885 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001886 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001887 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001889
Jeremy Hylton49048292000-07-11 03:28:17 +00001890 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892}
1893
Tim Peters52e155e2001-06-16 05:42:57 +00001894PyObject *
1895_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001896{
Tim Petersa7259592001-06-16 05:11:17 +00001897 assert(sep != NULL && PyString_Check(sep));
1898 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001899 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001900}
1901
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001902Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001903string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001904{
1905 if (*end > len)
1906 *end = len;
1907 else if (*end < 0)
1908 *end += len;
1909 if (*end < 0)
1910 *end = 0;
1911 if (*start < 0)
1912 *start += len;
1913 if (*start < 0)
1914 *start = 0;
1915}
1916
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001917Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001918string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001920 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001921 const char *sub;
1922 Py_ssize_t sub_len;
1923 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001924 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925
Facundo Batista57d56692007-11-16 18:04:14 +00001926 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1927 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001929 /* To support None in "start" and "end" arguments, meaning
1930 the same as if they were not passed.
1931 */
1932 if (obj_start != Py_None)
1933 if (!_PyEval_SliceIndex(obj_start, &start))
1934 return -2;
1935 if (obj_end != Py_None)
1936 if (!_PyEval_SliceIndex(obj_end, &end))
1937 return -2;
1938
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 if (PyString_Check(subobj)) {
1940 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001941 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001942 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001943#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001944 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001945 return PyUnicode_Find(
1946 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001947#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001948 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001949 /* XXX - the "expected a character buffer object" is pretty
1950 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951 return -2;
1952
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001953 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001954 return stringlib_find_slice(
1955 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1956 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001957 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001958 return stringlib_rfind_slice(
1959 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1960 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961}
1962
1963
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001964PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965"S.find(sub [,start [,end]]) -> int\n\
1966\n\
1967Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001968such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969arguments start and end are interpreted as in slice notation.\n\
1970\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001971Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
1973static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001974string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 if (result == -2)
1978 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984"S.index(sub [,start [,end]]) -> int\n\
1985\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001986Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987
1988static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001989string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001991 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 if (result == -2)
1993 return NULL;
1994 if (result == -1) {
1995 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001996 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997 return NULL;
1998 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001999 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000}
2001
2002
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004"S.rfind(sub [,start [,end]]) -> int\n\
2005\n\
2006Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00002007such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008arguments start and end are interpreted as in slice notation.\n\
2009\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002010Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011
2012static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002013string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002015 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016 if (result == -2)
2017 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002018 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019}
2020
2021
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002022PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023"S.rindex(sub [,start [,end]]) -> int\n\
2024\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002025Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026
2027static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002028string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031 if (result == -2)
2032 return NULL;
2033 if (result == -1) {
2034 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002035 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 return NULL;
2037 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039}
2040
2041
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002042Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2044{
2045 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002046 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002047 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002048 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2049 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002050
2051 i = 0;
2052 if (striptype != RIGHTSTRIP) {
2053 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2054 i++;
2055 }
2056 }
2057
2058 j = len;
2059 if (striptype != LEFTSTRIP) {
2060 do {
2061 j--;
2062 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2063 j++;
2064 }
2065
2066 if (i == 0 && j == len && PyString_CheckExact(self)) {
2067 Py_INCREF(self);
2068 return (PyObject*)self;
2069 }
2070 else
2071 return PyString_FromStringAndSize(s+i, j-i);
2072}
2073
2074
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002075Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002076do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077{
2078 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002079 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 i = 0;
2082 if (striptype != RIGHTSTRIP) {
2083 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2084 i++;
2085 }
2086 }
2087
2088 j = len;
2089 if (striptype != LEFTSTRIP) {
2090 do {
2091 j--;
2092 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2093 j++;
2094 }
2095
Tim Peters8fa5dd02001-09-12 02:18:30 +00002096 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097 Py_INCREF(self);
2098 return (PyObject*)self;
2099 }
2100 else
2101 return PyString_FromStringAndSize(s+i, j-i);
2102}
2103
2104
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002105Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002106do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2107{
2108 PyObject *sep = NULL;
2109
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002110 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111 return NULL;
2112
2113 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002114 if (PyString_Check(sep))
2115 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002116#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002117 else if (PyUnicode_Check(sep)) {
2118 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2119 PyObject *res;
2120 if (uniself==NULL)
2121 return NULL;
2122 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2123 striptype, sep);
2124 Py_DECREF(uniself);
2125 return res;
2126 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002127#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002128 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002129#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002130 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002131#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002132 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002133#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002134 STRIPNAME(striptype));
2135 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136 }
2137
2138 return do_strip(self, striptype);
2139}
2140
2141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002142PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002143"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144\n\
2145Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002146whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002147If chars is given and not None, remove characters in chars instead.\n\
2148If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
2150static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002151string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002153 if (PyTuple_GET_SIZE(args) == 0)
2154 return do_strip(self, BOTHSTRIP); /* Common case */
2155 else
2156 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157}
2158
2159
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002160PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002161"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002163Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002164If chars is given and not None, remove characters in chars instead.\n\
2165If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166
2167static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002168string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002170 if (PyTuple_GET_SIZE(args) == 0)
2171 return do_strip(self, LEFTSTRIP); /* Common case */
2172 else
2173 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174}
2175
2176
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002177PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002178"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002180Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002181If chars is given and not None, remove characters in chars instead.\n\
2182If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183
2184static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002185string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002187 if (PyTuple_GET_SIZE(args) == 0)
2188 return do_strip(self, RIGHTSTRIP); /* Common case */
2189 else
2190 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191}
2192
2193
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002194PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195"S.lower() -> string\n\
2196\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002197Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002199/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2200#ifndef _tolower
2201#define _tolower tolower
2202#endif
2203
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002205string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002207 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002208 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002211 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002212 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002214
2215 s = PyString_AS_STRING(newobj);
2216
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002217 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002218
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002220 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002221 if (isupper(c))
2222 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002224
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226}
2227
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229"S.upper() -> string\n\
2230\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002231Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002233#ifndef _toupper
2234#define _toupper toupper
2235#endif
2236
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002238string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002240 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002241 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002242 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002244 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002245 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002247
2248 s = PyString_AS_STRING(newobj);
2249
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002250 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002251
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002253 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002254 if (islower(c))
2255 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002257
Anthony Baxtera6286212006-04-11 07:42:36 +00002258 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259}
2260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002261PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262"S.title() -> string\n\
2263\n\
2264Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002265characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266
2267static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002268string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269{
2270 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002271 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002273 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274
Anthony Baxtera6286212006-04-11 07:42:36 +00002275 newobj = PyString_FromStringAndSize(NULL, n);
2276 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002278 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 for (i = 0; i < n; i++) {
2280 int c = Py_CHARMASK(*s++);
2281 if (islower(c)) {
2282 if (!previous_is_cased)
2283 c = toupper(c);
2284 previous_is_cased = 1;
2285 } else if (isupper(c)) {
2286 if (previous_is_cased)
2287 c = tolower(c);
2288 previous_is_cased = 1;
2289 } else
2290 previous_is_cased = 0;
2291 *s_new++ = c;
2292 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002293 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294}
2295
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002296PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297"S.capitalize() -> string\n\
2298\n\
2299Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002300capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301
2302static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002303string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304{
2305 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002306 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002307 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308
Anthony Baxtera6286212006-04-11 07:42:36 +00002309 newobj = PyString_FromStringAndSize(NULL, n);
2310 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002312 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313 if (0 < n) {
2314 int c = Py_CHARMASK(*s++);
2315 if (islower(c))
2316 *s_new = toupper(c);
2317 else
2318 *s_new = c;
2319 s_new++;
2320 }
2321 for (i = 1; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (isupper(c))
2324 *s_new = tolower(c);
2325 else
2326 *s_new = c;
2327 s_new++;
2328 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002329 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330}
2331
2332
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002333PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334"S.count(sub[, start[, end]]) -> int\n\
2335\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002336Return the number of non-overlapping occurrences of substring sub in\n\
2337string S[start:end]. Optional arguments start and end are interpreted\n\
2338as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339
2340static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002341string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002343 PyObject *sub_obj;
2344 const char *str = PyString_AS_STRING(self), *sub;
2345 Py_ssize_t sub_len;
2346 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002348 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2349 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002351
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002352 if (PyString_Check(sub_obj)) {
2353 sub = PyString_AS_STRING(sub_obj);
2354 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002356#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002357 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002358 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002359 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002360 if (count == -1)
2361 return NULL;
2362 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002363 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002364 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002365#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002366 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 return NULL;
2368
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002369 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002370
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002371 return PyInt_FromSsize_t(
2372 stringlib_count(str + start, end - start, sub, sub_len)
2373 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374}
2375
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002376PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377"S.swapcase() -> string\n\
2378\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002380converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381
2382static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002383string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384{
2385 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002386 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002387 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388
Anthony Baxtera6286212006-04-11 07:42:36 +00002389 newobj = PyString_FromStringAndSize(NULL, n);
2390 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002392 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002393 for (i = 0; i < n; i++) {
2394 int c = Py_CHARMASK(*s++);
2395 if (islower(c)) {
2396 *s_new = toupper(c);
2397 }
2398 else if (isupper(c)) {
2399 *s_new = tolower(c);
2400 }
2401 else
2402 *s_new = c;
2403 s_new++;
2404 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002405 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406}
2407
2408
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002409PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410"S.translate(table [,deletechars]) -> string\n\
2411\n\
2412Return a copy of the string S, where all characters occurring\n\
2413in the optional argument deletechars are removed, and the\n\
2414remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002415translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416
2417static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002418string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002421 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002422 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002424 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002425 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 PyObject *result;
2427 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002430 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433
2434 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002435 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 tablen = PyString_GET_SIZE(tableobj);
2437 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002438 else if (tableobj == Py_None) {
2439 table = NULL;
2440 tablen = 256;
2441 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002442#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002443 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002444 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002445 parameter; instead a mapping to None will cause characters
2446 to be deleted. */
2447 if (delobj != NULL) {
2448 PyErr_SetString(PyExc_TypeError,
2449 "deletions are implemented differently for unicode");
2450 return NULL;
2451 }
2452 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2453 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002454#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002455 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002457
Martin v. Löwis00b61272002-12-12 20:03:19 +00002458 if (tablen != 256) {
2459 PyErr_SetString(PyExc_ValueError,
2460 "translation table must be 256 characters long");
2461 return NULL;
2462 }
2463
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 if (delobj != NULL) {
2465 if (PyString_Check(delobj)) {
2466 del_table = PyString_AS_STRING(delobj);
2467 dellen = PyString_GET_SIZE(delobj);
2468 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002469#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 else if (PyUnicode_Check(delobj)) {
2471 PyErr_SetString(PyExc_TypeError,
2472 "deletions are implemented differently for unicode");
2473 return NULL;
2474 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002475#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2477 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002478 }
2479 else {
2480 del_table = NULL;
2481 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482 }
2483
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002484 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485 result = PyString_FromStringAndSize((char *)NULL, inlen);
2486 if (result == NULL)
2487 return NULL;
2488 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002489 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002490
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002491 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492 /* If no deletions are required, use faster code */
2493 for (i = inlen; --i >= 0; ) {
2494 c = Py_CHARMASK(*input++);
2495 if (Py_CHARMASK((*output++ = table[c])) != c)
2496 changed = 1;
2497 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002498 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499 return result;
2500 Py_DECREF(result);
2501 Py_INCREF(input_obj);
2502 return input_obj;
2503 }
2504
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002505 if (table == NULL) {
2506 for (i = 0; i < 256; i++)
2507 trans_table[i] = Py_CHARMASK(i);
2508 } else {
2509 for (i = 0; i < 256; i++)
2510 trans_table[i] = Py_CHARMASK(table[i]);
2511 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512
2513 for (i = 0; i < dellen; i++)
2514 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2515
2516 for (i = inlen; --i >= 0; ) {
2517 c = Py_CHARMASK(*input++);
2518 if (trans_table[c] != -1)
2519 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2520 continue;
2521 changed = 1;
2522 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002523 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002524 Py_DECREF(result);
2525 Py_INCREF(input_obj);
2526 return input_obj;
2527 }
2528 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002529 if (inlen > 0)
2530 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531 return result;
2532}
2533
2534
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002535#define FORWARD 1
2536#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002540#define findchar(target, target_len, c) \
2541 ((char *)memchr((const void *)(target), c, target_len))
2542
2543/* String ops must return a string. */
2544/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002545Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002546return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002548 if (PyString_CheckExact(self)) {
2549 Py_INCREF(self);
2550 return self;
2551 }
2552 return (PyStringObject *)PyString_FromStringAndSize(
2553 PyString_AS_STRING(self),
2554 PyString_GET_SIZE(self));
2555}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002557Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002558countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559{
2560 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002561 const char *start=target;
2562 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002564 while ( (start=findchar(start, end-start, c)) != NULL ) {
2565 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002566 if (count >= maxcount)
2567 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 start += 1;
2569 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570 return count;
2571}
2572
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002573Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002574findstring(const char *target, Py_ssize_t target_len,
2575 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002576 Py_ssize_t start,
2577 Py_ssize_t end,
2578 int direction)
2579{
2580 if (start < 0) {
2581 start += target_len;
2582 if (start < 0)
2583 start = 0;
2584 }
2585 if (end > target_len) {
2586 end = target_len;
2587 } else if (end < 0) {
2588 end += target_len;
2589 if (end < 0)
2590 end = 0;
2591 }
2592
2593 /* zero-length substrings always match at the first attempt */
2594 if (pattern_len == 0)
2595 return (direction > 0) ? start : end;
2596
2597 end -= pattern_len;
2598
2599 if (direction < 0) {
2600 for (; end >= start; end--)
2601 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2602 return end;
2603 } else {
2604 for (; start <= end; start++)
2605 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2606 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607 }
2608 return -1;
2609}
2610
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002611Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002612countstring(const char *target, Py_ssize_t target_len,
2613 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002614 Py_ssize_t start,
2615 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002616 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002620 if (start < 0) {
2621 start += target_len;
2622 if (start < 0)
2623 start = 0;
2624 }
2625 if (end > target_len) {
2626 end = target_len;
2627 } else if (end < 0) {
2628 end += target_len;
2629 if (end < 0)
2630 end = 0;
2631 }
2632
2633 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002634 if (pattern_len == 0 || maxcount == 0) {
2635 if (target_len+1 < maxcount)
2636 return target_len+1;
2637 return maxcount;
2638 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002639
2640 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002641 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002642 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002643 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2644 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002645 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646 end -= pattern_len-1;
2647 }
2648 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002649 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2651 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002652 if (--maxcount <= 0)
2653 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002654 start += pattern_len-1;
2655 }
2656 }
2657 return count;
2658}
2659
2660
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002661/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662
2663/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002664Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002666 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002667 Py_ssize_t maxcount)
2668{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002669 char *self_s, *result_s;
2670 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671 Py_ssize_t count, i, product;
2672 PyStringObject *result;
2673
2674 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002675
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002676 /* 1 at the end plus 1 after every character */
2677 count = self_len+1;
2678 if (maxcount < count)
2679 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002680
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002681 /* Check for overflow */
2682 /* result_len = count * to_len + self_len; */
2683 product = count * to_len;
2684 if (product / to_len != count) {
2685 PyErr_SetString(PyExc_OverflowError,
2686 "replace string is too long");
2687 return NULL;
2688 }
2689 result_len = product + self_len;
2690 if (result_len < 0) {
2691 PyErr_SetString(PyExc_OverflowError,
2692 "replace string is too long");
2693 return NULL;
2694 }
2695
2696 if (! (result = (PyStringObject *)
2697 PyString_FromStringAndSize(NULL, result_len)) )
2698 return NULL;
2699
2700 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002701 result_s = PyString_AS_STRING(result);
2702
2703 /* TODO: special case single character, which doesn't need memcpy */
2704
2705 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002706 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002707 result_s += to_len;
2708 count -= 1;
2709
2710 for (i=0; i<count; i++) {
2711 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002712 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002713 result_s += to_len;
2714 }
2715
2716 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002717 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718
2719 return result;
2720}
2721
2722/* Special case for deleting a single character */
2723/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002724Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002725replace_delete_single_character(PyStringObject *self,
2726 char from_c, Py_ssize_t maxcount)
2727{
2728 char *self_s, *result_s;
2729 char *start, *next, *end;
2730 Py_ssize_t self_len, result_len;
2731 Py_ssize_t count;
2732 PyStringObject *result;
2733
2734 self_len = PyString_GET_SIZE(self);
2735 self_s = PyString_AS_STRING(self);
2736
Andrew Dalke51324072006-05-26 20:25:22 +00002737 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738 if (count == 0) {
2739 return return_self(self);
2740 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741
2742 result_len = self_len - count; /* from_len == 1 */
2743 assert(result_len>=0);
2744
2745 if ( (result = (PyStringObject *)
2746 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2747 return NULL;
2748 result_s = PyString_AS_STRING(result);
2749
2750 start = self_s;
2751 end = self_s + self_len;
2752 while (count-- > 0) {
2753 next = findchar(start, end-start, from_c);
2754 if (next == NULL)
2755 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002756 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757 result_s += (next-start);
2758 start = next+1;
2759 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002760 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002761
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002762 return result;
2763}
2764
2765/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2766
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002767Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002768replace_delete_substring(PyStringObject *self,
2769 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002770 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002771 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002773 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002774 Py_ssize_t count, offset;
2775 PyStringObject *result;
2776
2777 self_len = PyString_GET_SIZE(self);
2778 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779
2780 count = countstring(self_s, self_len,
2781 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002782 0, self_len, 1,
2783 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002784
2785 if (count == 0) {
2786 /* no matches */
2787 return return_self(self);
2788 }
2789
2790 result_len = self_len - (count * from_len);
2791 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 if ( (result = (PyStringObject *)
2794 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2795 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 start = self_s;
2800 end = self_s + self_len;
2801 while (count-- > 0) {
2802 offset = findstring(start, end-start,
2803 from_s, from_len,
2804 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002805 if (offset == -1)
2806 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002808
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002809 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002810
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002811 result_s += (next-start);
2812 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002814 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002816}
2817
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002819Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820replace_single_character_in_place(PyStringObject *self,
2821 char from_c, char to_c,
2822 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002823{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824 char *self_s, *result_s, *start, *end, *next;
2825 Py_ssize_t self_len;
2826 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002827
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 /* The result string will be the same size */
2829 self_s = PyString_AS_STRING(self);
2830 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002831
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002833
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834 if (next == NULL) {
2835 /* No matches; return the original string */
2836 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002837 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002838
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002840 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841 if (result == NULL)
2842 return NULL;
2843 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002844 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002845
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 /* change everything in-place, starting with this one */
2847 start = result_s + (next-self_s);
2848 *start = to_c;
2849 start++;
2850 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002851
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002852 while (--maxcount > 0) {
2853 next = findchar(start, end-start, from_c);
2854 if (next == NULL)
2855 break;
2856 *next = to_c;
2857 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002858 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002859
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002860 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002861}
2862
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002864Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002865replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002866 const char *from_s, Py_ssize_t from_len,
2867 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002868 Py_ssize_t maxcount)
2869{
2870 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002871 char *self_s;
2872 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002876
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002877 self_s = PyString_AS_STRING(self);
2878 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002879
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002880 offset = findstring(self_s, self_len,
2881 from_s, from_len,
2882 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 if (offset == -1) {
2884 /* No matches; return the original string */
2885 return return_self(self);
2886 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002887
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002889 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002890 if (result == NULL)
2891 return NULL;
2892 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002893 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002894
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 /* change everything in-place, starting with this one */
2896 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002897 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 start += from_len;
2899 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002900
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 while ( --maxcount > 0) {
2902 offset = findstring(start, end-start,
2903 from_s, from_len,
2904 0, end-start, FORWARD);
2905 if (offset==-1)
2906 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002907 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 start += offset+from_len;
2909 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002910
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911 return result;
2912}
2913
2914/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002915Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916replace_single_character(PyStringObject *self,
2917 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002918 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 Py_ssize_t maxcount)
2920{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002921 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002923 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002924 Py_ssize_t count, product;
2925 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002926
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002927 self_s = PyString_AS_STRING(self);
2928 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002929
Andrew Dalke51324072006-05-26 20:25:22 +00002930 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002931 if (count == 0) {
2932 /* no matches, return unchanged */
2933 return return_self(self);
2934 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002935
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002936 /* use the difference between current and new, hence the "-1" */
2937 /* result_len = self_len + count * (to_len-1) */
2938 product = count * (to_len-1);
2939 if (product / (to_len-1) != count) {
2940 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2941 return NULL;
2942 }
2943 result_len = self_len + product;
2944 if (result_len < 0) {
2945 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2946 return NULL;
2947 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002948
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002949 if ( (result = (PyStringObject *)
2950 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2951 return NULL;
2952 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002953
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002954 start = self_s;
2955 end = self_s + self_len;
2956 while (count-- > 0) {
2957 next = findchar(start, end-start, from_c);
2958 if (next == NULL)
2959 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002960
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 if (next == start) {
2962 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002963 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002964 result_s += to_len;
2965 start += 1;
2966 } else {
2967 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002968 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002969 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002970 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002971 result_s += to_len;
2972 start = next+1;
2973 }
2974 }
2975 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002976 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002977
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 return result;
2979}
2980
2981/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002982Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002984 const char *from_s, Py_ssize_t from_len,
2985 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002986 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002987 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002988 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002989 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002990 Py_ssize_t count, offset, product;
2991 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002992
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 self_s = PyString_AS_STRING(self);
2994 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002995
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002996 count = countstring(self_s, self_len,
2997 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002998 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002999 if (count == 0) {
3000 /* no matches, return unchanged */
3001 return return_self(self);
3002 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003003
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003004 /* Check for overflow */
3005 /* result_len = self_len + count * (to_len-from_len) */
3006 product = count * (to_len-from_len);
3007 if (product / (to_len-from_len) != count) {
3008 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3009 return NULL;
3010 }
3011 result_len = self_len + product;
3012 if (result_len < 0) {
3013 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3014 return NULL;
3015 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003016
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 if ( (result = (PyStringObject *)
3018 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3019 return NULL;
3020 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003021
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003022 start = self_s;
3023 end = self_s + self_len;
3024 while (count-- > 0) {
3025 offset = findstring(start, end-start,
3026 from_s, from_len,
3027 0, end-start, FORWARD);
3028 if (offset == -1)
3029 break;
3030 next = start+offset;
3031 if (next == start) {
3032 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003033 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003034 result_s += to_len;
3035 start += from_len;
3036 } else {
3037 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003038 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003040 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003041 result_s += to_len;
3042 start = next+from_len;
3043 }
3044 }
3045 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003046 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003047
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048 return result;
3049}
3050
3051
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003052Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003054 const char *from_s, Py_ssize_t from_len,
3055 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003056 Py_ssize_t maxcount)
3057{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003058 if (maxcount < 0) {
3059 maxcount = PY_SSIZE_T_MAX;
3060 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3061 /* nothing to do; return the original string */
3062 return return_self(self);
3063 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003064
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 if (maxcount == 0 ||
3066 (from_len == 0 && to_len == 0)) {
3067 /* nothing to do; return the original string */
3068 return return_self(self);
3069 }
3070
3071 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003072
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003073 if (from_len == 0) {
3074 /* insert the 'to' string everywhere. */
3075 /* >>> "Python".replace("", ".") */
3076 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003077 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 }
3079
3080 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3081 /* point for an empty self string to generate a non-empty string */
3082 /* Special case so the remaining code always gets a non-empty string */
3083 if (PyString_GET_SIZE(self) == 0) {
3084 return return_self(self);
3085 }
3086
3087 if (to_len == 0) {
3088 /* delete all occurances of 'from' string */
3089 if (from_len == 1) {
3090 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003091 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003093 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003094 }
3095 }
3096
3097 /* Handle special case where both strings have the same length */
3098
3099 if (from_len == to_len) {
3100 if (from_len == 1) {
3101 return replace_single_character_in_place(
3102 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003103 from_s[0],
3104 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003105 maxcount);
3106 } else {
3107 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003108 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 }
3110 }
3111
3112 /* Otherwise use the more generic algorithms */
3113 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003114 return replace_single_character(self, from_s[0],
3115 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003116 } else {
3117 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003118 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003119 }
3120}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003122PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003123"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003124\n\
3125Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003126old replaced by new. If the optional argument count is\n\
3127given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003128
3129static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003130string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003131{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003132 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003133 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003134 const char *from_s, *to_s;
3135 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003137 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003138 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003140 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003141 from_s = PyString_AS_STRING(from);
3142 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003145 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003146 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003147 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003148#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003149 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 return NULL;
3151
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003152 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003153 to_s = PyString_AS_STRING(to);
3154 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003156#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003157 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003158 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003159 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003160#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003161 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162 return NULL;
3163
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003164 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003165 from_s, from_len,
3166 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003167}
3168
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003169/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003170
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003171/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003172 * against substr, using the start and end arguments. Returns
3173 * -1 on error, 0 if not found and 1 if found.
3174 */
3175Py_LOCAL(int)
3176_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3177 Py_ssize_t end, int direction)
3178{
3179 Py_ssize_t len = PyString_GET_SIZE(self);
3180 Py_ssize_t slen;
3181 const char* sub;
3182 const char* str;
3183
3184 if (PyString_Check(substr)) {
3185 sub = PyString_AS_STRING(substr);
3186 slen = PyString_GET_SIZE(substr);
3187 }
3188#ifdef Py_USING_UNICODE
3189 else if (PyUnicode_Check(substr))
3190 return PyUnicode_Tailmatch((PyObject *)self,
3191 substr, start, end, direction);
3192#endif
3193 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3194 return -1;
3195 str = PyString_AS_STRING(self);
3196
3197 string_adjust_indices(&start, &end, len);
3198
3199 if (direction < 0) {
3200 /* startswith */
3201 if (start+slen > len)
3202 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003203 } else {
3204 /* endswith */
3205 if (end-start < slen || start > len)
3206 return 0;
3207
3208 if (end-slen > start)
3209 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003210 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003211 if (end-start >= slen)
3212 return ! memcmp(str+start, sub, slen);
3213 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003214}
3215
3216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003217PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003218"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003220Return True if S starts with the specified prefix, False otherwise.\n\
3221With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003222With optional end, stop comparing S at that position.\n\
3223prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003224
3225static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003226string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003228 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003229 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003231 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003232
Guido van Rossumc6821402000-05-08 14:08:05 +00003233 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3234 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003236 if (PyTuple_Check(subobj)) {
3237 Py_ssize_t i;
3238 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3239 result = _string_tailmatch(self,
3240 PyTuple_GET_ITEM(subobj, i),
3241 start, end, -1);
3242 if (result == -1)
3243 return NULL;
3244 else if (result) {
3245 Py_RETURN_TRUE;
3246 }
3247 }
3248 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 }
Georg Brandl24250812006-06-09 18:45:48 +00003250 result = _string_tailmatch(self, subobj, start, end, -1);
3251 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003252 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003253 else
Georg Brandl24250812006-06-09 18:45:48 +00003254 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003255}
3256
3257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003258PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003259"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003260\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003261Return True if S ends with the specified suffix, False otherwise.\n\
3262With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003263With optional end, stop comparing S at that position.\n\
3264suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003265
3266static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003267string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003268{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003269 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003270 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003271 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003272 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003273
Guido van Rossumc6821402000-05-08 14:08:05 +00003274 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3275 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003276 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003277 if (PyTuple_Check(subobj)) {
3278 Py_ssize_t i;
3279 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3280 result = _string_tailmatch(self,
3281 PyTuple_GET_ITEM(subobj, i),
3282 start, end, +1);
3283 if (result == -1)
3284 return NULL;
3285 else if (result) {
3286 Py_RETURN_TRUE;
3287 }
3288 }
3289 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290 }
Georg Brandl24250812006-06-09 18:45:48 +00003291 result = _string_tailmatch(self, subobj, start, end, +1);
3292 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003293 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003294 else
Georg Brandl24250812006-06-09 18:45:48 +00003295 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003296}
3297
3298
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003299PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003300"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003301\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003302Encodes S using the codec registered for encoding. encoding defaults\n\
3303to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003304handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003305a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3306'xmlcharrefreplace' as well as any other name registered with\n\
3307codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003308
3309static PyObject *
3310string_encode(PyStringObject *self, PyObject *args)
3311{
3312 char *encoding = NULL;
3313 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003314 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003316 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3317 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003318 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003319 if (v == NULL)
3320 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003321 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3322 PyErr_Format(PyExc_TypeError,
3323 "encoder did not return a string/unicode object "
3324 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003325 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003326 Py_DECREF(v);
3327 return NULL;
3328 }
3329 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003330
3331 onError:
3332 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003333}
3334
3335
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003336PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003337"S.decode([encoding[,errors]]) -> object\n\
3338\n\
3339Decodes S using the codec registered for encoding. encoding defaults\n\
3340to the default encoding. errors may be given to set a different error\n\
3341handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003342a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3343as well as any other name registerd with codecs.register_error that is\n\
3344able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003345
3346static PyObject *
3347string_decode(PyStringObject *self, PyObject *args)
3348{
3349 char *encoding = NULL;
3350 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003351 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003352
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003353 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3354 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003355 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003356 if (v == NULL)
3357 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003358 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3359 PyErr_Format(PyExc_TypeError,
3360 "decoder did not return a string/unicode object "
3361 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003362 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003363 Py_DECREF(v);
3364 return NULL;
3365 }
3366 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003367
3368 onError:
3369 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003370}
3371
3372
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003373PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003374"S.expandtabs([tabsize]) -> string\n\
3375\n\
3376Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003377If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378
3379static PyObject*
3380string_expandtabs(PyStringObject *self, PyObject *args)
3381{
Guido van Rossum5bdff602008-03-11 21:18:06 +00003382 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003383 char *q;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003384 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003385 PyObject *u;
3386 int tabsize = 8;
3387
3388 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3389 return NULL;
3390
Thomas Wouters7e474022000-07-16 12:04:32 +00003391 /* First pass: determine size of output string */
Guido van Rossum5bdff602008-03-11 21:18:06 +00003392 i = 0; /* chars up to and including most recent \n or \r */
3393 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3394 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 for (p = PyString_AS_STRING(self); p < e; p++)
3396 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003397 if (tabsize > 0) {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003398 incr = tabsize - (j % tabsize);
3399 if (j > PY_SSIZE_T_MAX - incr)
3400 goto overflow1;
3401 j += incr;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003402 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403 }
3404 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003405 if (j > PY_SSIZE_T_MAX - 1)
3406 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 j++;
3408 if (*p == '\n' || *p == '\r') {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003409 if (i > PY_SSIZE_T_MAX - j)
3410 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411 i += j;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003412 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003413 }
3414 }
3415
Guido van Rossum5bdff602008-03-11 21:18:06 +00003416 if (i > PY_SSIZE_T_MAX - j)
3417 goto overflow1;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003418
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419 /* Second pass: create output string and fill it */
3420 u = PyString_FromStringAndSize(NULL, i + j);
3421 if (!u)
3422 return NULL;
3423
Guido van Rossum5bdff602008-03-11 21:18:06 +00003424 j = 0; /* same as in first pass */
3425 q = PyString_AS_STRING(u); /* next output char */
3426 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427
3428 for (p = PyString_AS_STRING(self); p < e; p++)
3429 if (*p == '\t') {
3430 if (tabsize > 0) {
3431 i = tabsize - (j % tabsize);
3432 j += i;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003433 while (i--) {
3434 if (q >= qe)
3435 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436 *q++ = ' ';
Guido van Rossum5bdff602008-03-11 21:18:06 +00003437 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438 }
3439 }
3440 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003441 if (q >= qe)
3442 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443 *q++ = *p;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003444 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445 if (*p == '\n' || *p == '\r')
3446 j = 0;
3447 }
3448
3449 return u;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003450
3451 overflow2:
3452 Py_DECREF(u);
3453 overflow1:
3454 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3455 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456}
3457
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003458Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003459pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003460{
3461 PyObject *u;
3462
3463 if (left < 0)
3464 left = 0;
3465 if (right < 0)
3466 right = 0;
3467
Tim Peters8fa5dd02001-09-12 02:18:30 +00003468 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469 Py_INCREF(self);
3470 return (PyObject *)self;
3471 }
3472
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003473 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474 left + PyString_GET_SIZE(self) + right);
3475 if (u) {
3476 if (left)
3477 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003478 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003479 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480 PyString_GET_SIZE(self));
3481 if (right)
3482 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3483 fill, right);
3484 }
3485
3486 return u;
3487}
3488
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003489PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003490"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003491"\n"
3492"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003493"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003494
3495static PyObject *
3496string_ljust(PyStringObject *self, PyObject *args)
3497{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003498 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003499 char fillchar = ' ';
3500
Thomas Wouters4abb3662006-04-19 14:50:15 +00003501 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502 return NULL;
3503
Tim Peters8fa5dd02001-09-12 02:18:30 +00003504 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 Py_INCREF(self);
3506 return (PyObject*) self;
3507 }
3508
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003509 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510}
3511
3512
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003513PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003514"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003515"\n"
3516"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003517"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518
3519static PyObject *
3520string_rjust(PyStringObject *self, PyObject *args)
3521{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003522 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003523 char fillchar = ' ';
3524
Thomas Wouters4abb3662006-04-19 14:50:15 +00003525 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526 return NULL;
3527
Tim Peters8fa5dd02001-09-12 02:18:30 +00003528 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529 Py_INCREF(self);
3530 return (PyObject*) self;
3531 }
3532
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003533 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534}
3535
3536
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003537PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003538"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003539"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003540"Return S centered in a string of length width. Padding is\n"
3541"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542
3543static PyObject *
3544string_center(PyStringObject *self, PyObject *args)
3545{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003546 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003547 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003548 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549
Thomas Wouters4abb3662006-04-19 14:50:15 +00003550 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003551 return NULL;
3552
Tim Peters8fa5dd02001-09-12 02:18:30 +00003553 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554 Py_INCREF(self);
3555 return (PyObject*) self;
3556 }
3557
3558 marg = width - PyString_GET_SIZE(self);
3559 left = marg / 2 + (marg & width & 1);
3560
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003561 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562}
3563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003564PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003565"S.zfill(width) -> string\n"
3566"\n"
3567"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003569
3570static PyObject *
3571string_zfill(PyStringObject *self, PyObject *args)
3572{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003573 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003574 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003575 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003576 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003577
Thomas Wouters4abb3662006-04-19 14:50:15 +00003578 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003579 return NULL;
3580
3581 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003582 if (PyString_CheckExact(self)) {
3583 Py_INCREF(self);
3584 return (PyObject*) self;
3585 }
3586 else
3587 return PyString_FromStringAndSize(
3588 PyString_AS_STRING(self),
3589 PyString_GET_SIZE(self)
3590 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003591 }
3592
3593 fill = width - PyString_GET_SIZE(self);
3594
3595 s = pad(self, fill, 0, '0');
3596
3597 if (s == NULL)
3598 return NULL;
3599
3600 p = PyString_AS_STRING(s);
3601 if (p[fill] == '+' || p[fill] == '-') {
3602 /* move sign to beginning of string */
3603 p[0] = p[fill];
3604 p[fill] = '0';
3605 }
3606
3607 return (PyObject*) s;
3608}
3609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003610PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003611"S.isspace() -> bool\n\
3612\n\
3613Return True if all characters in S are whitespace\n\
3614and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615
3616static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003617string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618{
Fred Drakeba096332000-07-09 07:04:36 +00003619 register const unsigned char *p
3620 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003621 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623 /* Shortcut for single character strings */
3624 if (PyString_GET_SIZE(self) == 1 &&
3625 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003628 /* Special case for empty strings */
3629 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003631
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632 e = p + PyString_GET_SIZE(self);
3633 for (; p < e; p++) {
3634 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638}
3639
3640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003641PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003643\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003644Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003645and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003646
3647static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003648string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649{
Fred Drakeba096332000-07-09 07:04:36 +00003650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003652 register const unsigned char *e;
3653
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003654 /* Shortcut for single character strings */
3655 if (PyString_GET_SIZE(self) == 1 &&
3656 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003658
3659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003662
3663 e = p + PyString_GET_SIZE(self);
3664 for (; p < e; p++) {
3665 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003669}
3670
3671
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003672PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003674\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003675Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003677
3678static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003679string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680{
Fred Drakeba096332000-07-09 07:04:36 +00003681 register const unsigned char *p
3682 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003683 register const unsigned char *e;
3684
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003685 /* Shortcut for single character strings */
3686 if (PyString_GET_SIZE(self) == 1 &&
3687 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003689
3690 /* Special case for empty strings */
3691 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003693
3694 e = p + PyString_GET_SIZE(self);
3695 for (; p < e; p++) {
3696 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003698 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003700}
3701
3702
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003703PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003706Return True if all characters in S are digits\n\
3707and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708
3709static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003710string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711{
Fred Drakeba096332000-07-09 07:04:36 +00003712 register const unsigned char *p
3713 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003714 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716 /* Shortcut for single character strings */
3717 if (PyString_GET_SIZE(self) == 1 &&
3718 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003719 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003721 /* Special case for empty strings */
3722 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003724
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725 e = p + PyString_GET_SIZE(self);
3726 for (; p < e; p++) {
3727 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731}
3732
3733
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003734PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003735"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003737Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003738at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739
3740static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003741string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742{
Fred Drakeba096332000-07-09 07:04:36 +00003743 register const unsigned char *p
3744 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003745 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 int cased;
3747
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 /* Shortcut for single character strings */
3749 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003752 /* Special case for empty strings */
3753 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003755
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 e = p + PyString_GET_SIZE(self);
3757 cased = 0;
3758 for (; p < e; p++) {
3759 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 else if (!cased && islower(*p))
3762 cased = 1;
3763 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765}
3766
3767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003768PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003769"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003771Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003772at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773
3774static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003775string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776{
Fred Drakeba096332000-07-09 07:04:36 +00003777 register const unsigned char *p
3778 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003779 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780 int cased;
3781
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 /* Shortcut for single character strings */
3783 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003784 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003785
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003786 /* Special case for empty strings */
3787 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003788 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003789
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 e = p + PyString_GET_SIZE(self);
3791 cased = 0;
3792 for (; p < e; p++) {
3793 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003794 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795 else if (!cased && isupper(*p))
3796 cased = 1;
3797 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003798 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799}
3800
3801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003802PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003803"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003805Return True if S is a titlecased string and there is at least one\n\
3806character in S, i.e. uppercase characters may only follow uncased\n\
3807characters and lowercase characters only cased ones. Return False\n\
3808otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809
3810static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003811string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003812{
Fred Drakeba096332000-07-09 07:04:36 +00003813 register const unsigned char *p
3814 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003815 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 int cased, previous_is_cased;
3817
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818 /* Shortcut for single character strings */
3819 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003820 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003822 /* Special case for empty strings */
3823 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003824 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003825
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 e = p + PyString_GET_SIZE(self);
3827 cased = 0;
3828 previous_is_cased = 0;
3829 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003830 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831
3832 if (isupper(ch)) {
3833 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003834 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835 previous_is_cased = 1;
3836 cased = 1;
3837 }
3838 else if (islower(ch)) {
3839 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003840 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841 previous_is_cased = 1;
3842 cased = 1;
3843 }
3844 else
3845 previous_is_cased = 0;
3846 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003847 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848}
3849
3850
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003851PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003852"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853\n\
3854Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003855Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003856is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857
Guido van Rossum4c08d552000-03-10 22:55:18 +00003858static PyObject*
3859string_splitlines(PyStringObject *self, PyObject *args)
3860{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003861 register Py_ssize_t i;
3862 register Py_ssize_t j;
3863 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003864 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003865 PyObject *list;
3866 PyObject *str;
3867 char *data;
3868
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003869 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003870 return NULL;
3871
3872 data = PyString_AS_STRING(self);
3873 len = PyString_GET_SIZE(self);
3874
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003875 /* This does not use the preallocated list because splitlines is
3876 usually run with hundreds of newlines. The overhead of
3877 switching between PyList_SET_ITEM and append causes about a
3878 2-3% slowdown for that common case. A smarter implementation
3879 could move the if check out, so the SET_ITEMs are done first
3880 and the appends only done when the prealloc buffer is full.
3881 That's too much work for little gain.*/
3882
Guido van Rossum4c08d552000-03-10 22:55:18 +00003883 list = PyList_New(0);
3884 if (!list)
3885 goto onError;
3886
3887 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003888 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003889
Guido van Rossum4c08d552000-03-10 22:55:18 +00003890 /* Find a line and append it */
3891 while (i < len && data[i] != '\n' && data[i] != '\r')
3892 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003893
3894 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003895 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003896 if (i < len) {
3897 if (data[i] == '\r' && i + 1 < len &&
3898 data[i+1] == '\n')
3899 i += 2;
3900 else
3901 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003902 if (keepends)
3903 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003904 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003905 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003906 j = i;
3907 }
3908 if (j < len) {
3909 SPLIT_APPEND(data, j, len);
3910 }
3911
3912 return list;
3913
3914 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003915 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003916 return NULL;
3917}
3918
3919#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003920#undef SPLIT_ADD
3921#undef MAX_PREALLOC
3922#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003923
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003924static PyObject *
3925string_getnewargs(PyStringObject *v)
3926{
Christian Heimese93237d2007-12-19 02:37:44 +00003927 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003928}
3929
Eric Smitha9f7d622008-02-17 19:46:49 +00003930
3931#include "stringlib/string_format.h"
3932
3933PyDoc_STRVAR(format__doc__,
3934"S.format(*args, **kwargs) -> unicode\n\
3935\n\
3936");
3937
3938PyDoc_STRVAR(p_format__doc__,
3939"S.__format__(format_spec) -> unicode\n\
3940\n\
3941");
3942
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003943
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003944static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003945string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003946 /* Counterparts of the obsolete stropmodule functions; except
3947 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003948 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3949 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003950 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003951 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3952 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003953 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3954 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3955 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3956 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3957 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3958 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3959 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003960 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3961 capitalize__doc__},
3962 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3963 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3964 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003965 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003966 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3967 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3968 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3969 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3970 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3971 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3972 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003973 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3974 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003975 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3976 startswith__doc__},
3977 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3978 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3979 swapcase__doc__},
3980 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3981 translate__doc__},
3982 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3983 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3984 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3985 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3986 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Eric Smitha9f7d622008-02-17 19:46:49 +00003987 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3988 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3989 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3990 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003991 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3992 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3993 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3994 expandtabs__doc__},
3995 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3996 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003997 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003998 {NULL, NULL} /* sentinel */
3999};
4000
Jeremy Hylton938ace62002-07-17 16:30:39 +00004001static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004002str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4003
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004004static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004005string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004006{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004008 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009
Guido van Rossumae960af2001-08-30 03:11:59 +00004010 if (type != &PyString_Type)
4011 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004012 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4013 return NULL;
4014 if (x == NULL)
4015 return PyString_FromString("");
4016 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004017}
4018
Guido van Rossumae960af2001-08-30 03:11:59 +00004019static PyObject *
4020str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4021{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004022 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004023 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004024
4025 assert(PyType_IsSubtype(type, &PyString_Type));
4026 tmp = string_new(&PyString_Type, args, kwds);
4027 if (tmp == NULL)
4028 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004029 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004030 n = PyString_GET_SIZE(tmp);
4031 pnew = type->tp_alloc(type, n);
4032 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004033 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004034 ((PyStringObject *)pnew)->ob_shash =
4035 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004036 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004037 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004038 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004039 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004040}
4041
Guido van Rossumcacfc072002-05-24 19:01:59 +00004042static PyObject *
4043basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4044{
4045 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004046 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004047 return NULL;
4048}
4049
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004050static PyObject *
4051string_mod(PyObject *v, PyObject *w)
4052{
4053 if (!PyString_Check(v)) {
4054 Py_INCREF(Py_NotImplemented);
4055 return Py_NotImplemented;
4056 }
4057 return PyString_Format(v, w);
4058}
4059
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004060PyDoc_STRVAR(basestring_doc,
4061"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004062
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004063static PyNumberMethods string_as_number = {
4064 0, /*nb_add*/
4065 0, /*nb_subtract*/
4066 0, /*nb_multiply*/
4067 0, /*nb_divide*/
4068 string_mod, /*nb_remainder*/
4069};
4070
4071
Guido van Rossumcacfc072002-05-24 19:01:59 +00004072PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004073 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004074 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004075 0,
4076 0,
4077 0, /* tp_dealloc */
4078 0, /* tp_print */
4079 0, /* tp_getattr */
4080 0, /* tp_setattr */
4081 0, /* tp_compare */
4082 0, /* tp_repr */
4083 0, /* tp_as_number */
4084 0, /* tp_as_sequence */
4085 0, /* tp_as_mapping */
4086 0, /* tp_hash */
4087 0, /* tp_call */
4088 0, /* tp_str */
4089 0, /* tp_getattro */
4090 0, /* tp_setattro */
4091 0, /* tp_as_buffer */
4092 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4093 basestring_doc, /* tp_doc */
4094 0, /* tp_traverse */
4095 0, /* tp_clear */
4096 0, /* tp_richcompare */
4097 0, /* tp_weaklistoffset */
4098 0, /* tp_iter */
4099 0, /* tp_iternext */
4100 0, /* tp_methods */
4101 0, /* tp_members */
4102 0, /* tp_getset */
4103 &PyBaseObject_Type, /* tp_base */
4104 0, /* tp_dict */
4105 0, /* tp_descr_get */
4106 0, /* tp_descr_set */
4107 0, /* tp_dictoffset */
4108 0, /* tp_init */
4109 0, /* tp_alloc */
4110 basestring_new, /* tp_new */
4111 0, /* tp_free */
4112};
4113
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004114PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004115"str(object) -> string\n\
4116\n\
4117Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004118If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004119
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004120PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004121 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004122 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004125 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004126 (printfunc)string_print, /* tp_print */
4127 0, /* tp_getattr */
4128 0, /* tp_setattr */
4129 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004130 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004131 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004132 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004133 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004134 (hashfunc)string_hash, /* tp_hash */
4135 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004136 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004137 PyObject_GenericGetAttr, /* tp_getattro */
4138 0, /* tp_setattro */
4139 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004140 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Christian Heimes1a6387e2008-03-26 12:49:49 +00004141 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4142 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004143 string_doc, /* tp_doc */
4144 0, /* tp_traverse */
4145 0, /* tp_clear */
4146 (richcmpfunc)string_richcompare, /* tp_richcompare */
4147 0, /* tp_weaklistoffset */
4148 0, /* tp_iter */
4149 0, /* tp_iternext */
4150 string_methods, /* tp_methods */
4151 0, /* tp_members */
4152 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004153 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004154 0, /* tp_dict */
4155 0, /* tp_descr_get */
4156 0, /* tp_descr_set */
4157 0, /* tp_dictoffset */
4158 0, /* tp_init */
4159 0, /* tp_alloc */
4160 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004161 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004162};
4163
4164void
Fred Drakeba096332000-07-09 07:04:36 +00004165PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004166{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004168 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004169 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004170 if (w == NULL || !PyString_Check(*pv)) {
4171 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004172 *pv = NULL;
4173 return;
4174 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004175 v = string_concat((PyStringObject *) *pv, w);
4176 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004177 *pv = v;
4178}
4179
Guido van Rossum013142a1994-08-30 08:19:36 +00004180void
Fred Drakeba096332000-07-09 07:04:36 +00004181PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004182{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004183 PyString_Concat(pv, w);
4184 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004185}
4186
4187
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004188/* The following function breaks the notion that strings are immutable:
4189 it changes the size of a string. We get away with this only if there
4190 is only one module referencing the object. You can also think of it
4191 as creating a new string object and destroying the old one, only
4192 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004193 already be known to some other part of the code...
4194 Note that if there's not enough memory to resize the string, the original
4195 string object at *pv is deallocated, *pv is set to NULL, an "out of
4196 memory" exception is set, and -1 is returned. Else (on success) 0 is
4197 returned, and the value in *pv may or may not be the same as on input.
4198 As always, an extra byte is allocated for a trailing \0 byte (newsize
4199 does *not* include that), and a trailing \0 byte is stored.
4200*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004201
4202int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004203_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004204{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004205 register PyObject *v;
4206 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004207 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004208 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004209 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004210 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004211 Py_DECREF(v);
4212 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004213 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004214 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004215 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004216 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004217 _Py_ForgetReference(v);
4218 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004219 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004220 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004221 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004222 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004223 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004224 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004225 _Py_NewReference(*pv);
4226 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004227 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004228 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004229 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004230 return 0;
4231}
Guido van Rossume5372401993-03-16 12:15:04 +00004232
4233/* Helpers for formatstring */
4234
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004235Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004236getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004237{
Thomas Wouters977485d2006-02-16 15:59:12 +00004238 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004239 if (argidx < arglen) {
4240 (*p_argidx)++;
4241 if (arglen < 0)
4242 return args;
4243 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004244 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004245 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004246 PyErr_SetString(PyExc_TypeError,
4247 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004248 return NULL;
4249}
4250
Tim Peters38fd5b62000-09-21 05:43:11 +00004251/* Format codes
4252 * F_LJUST '-'
4253 * F_SIGN '+'
4254 * F_BLANK ' '
4255 * F_ALT '#'
4256 * F_ZERO '0'
4257 */
Guido van Rossume5372401993-03-16 12:15:04 +00004258#define F_LJUST (1<<0)
4259#define F_SIGN (1<<1)
4260#define F_BLANK (1<<2)
4261#define F_ALT (1<<3)
4262#define F_ZERO (1<<4)
4263
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004264Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004265formatfloat(char *buf, size_t buflen, int flags,
4266 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004267{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004268 /* fmt = '%#.' + `prec` + `type`
4269 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004270 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004271 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004272 x = PyFloat_AsDouble(v);
4273 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004274 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004275 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004276 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004277 }
Guido van Rossume5372401993-03-16 12:15:04 +00004278 if (prec < 0)
4279 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004280 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4281 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004282 /* Worst case length calc to ensure no buffer overrun:
4283
4284 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004285 fmt = %#.<prec>g
4286 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004287 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004288 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004289
4290 'f' formats:
4291 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4292 len = 1 + 50 + 1 + prec = 52 + prec
4293
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004294 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004295 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004296
4297 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004298 if (((type == 'g' || type == 'G') &&
4299 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004300 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004301 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004302 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004303 return -1;
4304 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004305 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4306 (flags&F_ALT) ? "#" : "",
4307 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004308 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004309 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004310}
4311
Tim Peters38fd5b62000-09-21 05:43:11 +00004312/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4313 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4314 * Python's regular ints.
4315 * Return value: a new PyString*, or NULL if error.
4316 * . *pbuf is set to point into it,
4317 * *plen set to the # of chars following that.
4318 * Caller must decref it when done using pbuf.
4319 * The string starting at *pbuf is of the form
4320 * "-"? ("0x" | "0X")? digit+
4321 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004322 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004323 * There will be at least prec digits, zero-filled on the left if
4324 * necessary to get that many.
4325 * val object to be converted
4326 * flags bitmask of format flags; only F_ALT is looked at
4327 * prec minimum number of digits; 0-fill on left if needed
4328 * type a character in [duoxX]; u acts the same as d
4329 *
4330 * CAUTION: o, x and X conversions on regular ints can never
4331 * produce a '-' sign, but can for Python's unbounded ints.
4332 */
4333PyObject*
4334_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4335 char **pbuf, int *plen)
4336{
4337 PyObject *result = NULL;
4338 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004339 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004340 int sign; /* 1 if '-', else 0 */
4341 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004342 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004343 int numdigits; /* len == numnondigits + numdigits */
4344 int numnondigits = 0;
4345
4346 switch (type) {
4347 case 'd':
4348 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004349 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004350 break;
4351 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004352 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004353 break;
4354 case 'x':
4355 case 'X':
4356 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004357 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004358 break;
4359 default:
4360 assert(!"'type' not in [duoxX]");
4361 }
4362 if (!result)
4363 return NULL;
4364
Neal Norwitz56423e52006-08-13 18:11:08 +00004365 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004366 if (!buf) {
4367 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004368 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004369 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004370
Tim Peters38fd5b62000-09-21 05:43:11 +00004371 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004372 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004373 PyErr_BadInternalCall();
4374 return NULL;
4375 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004376 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004377 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004378 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4379 return NULL;
4380 }
4381 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004382 if (buf[len-1] == 'L') {
4383 --len;
4384 buf[len] = '\0';
4385 }
4386 sign = buf[0] == '-';
4387 numnondigits += sign;
4388 numdigits = len - numnondigits;
4389 assert(numdigits > 0);
4390
Tim Petersfff53252001-04-12 18:38:48 +00004391 /* Get rid of base marker unless F_ALT */
4392 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004393 /* Need to skip 0x, 0X or 0. */
4394 int skipped = 0;
4395 switch (type) {
4396 case 'o':
4397 assert(buf[sign] == '0');
4398 /* If 0 is only digit, leave it alone. */
4399 if (numdigits > 1) {
4400 skipped = 1;
4401 --numdigits;
4402 }
4403 break;
4404 case 'x':
4405 case 'X':
4406 assert(buf[sign] == '0');
4407 assert(buf[sign + 1] == 'x');
4408 skipped = 2;
4409 numnondigits -= 2;
4410 break;
4411 }
4412 if (skipped) {
4413 buf += skipped;
4414 len -= skipped;
4415 if (sign)
4416 buf[0] = '-';
4417 }
4418 assert(len == numnondigits + numdigits);
4419 assert(numdigits > 0);
4420 }
4421
4422 /* Fill with leading zeroes to meet minimum width. */
4423 if (prec > numdigits) {
4424 PyObject *r1 = PyString_FromStringAndSize(NULL,
4425 numnondigits + prec);
4426 char *b1;
4427 if (!r1) {
4428 Py_DECREF(result);
4429 return NULL;
4430 }
4431 b1 = PyString_AS_STRING(r1);
4432 for (i = 0; i < numnondigits; ++i)
4433 *b1++ = *buf++;
4434 for (i = 0; i < prec - numdigits; i++)
4435 *b1++ = '0';
4436 for (i = 0; i < numdigits; i++)
4437 *b1++ = *buf++;
4438 *b1 = '\0';
4439 Py_DECREF(result);
4440 result = r1;
4441 buf = PyString_AS_STRING(result);
4442 len = numnondigits + prec;
4443 }
4444
4445 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004446 if (type == 'X') {
4447 /* Need to convert all lower case letters to upper case.
4448 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004449 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004450 if (buf[i] >= 'a' && buf[i] <= 'x')
4451 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004452 }
4453 *pbuf = buf;
4454 *plen = len;
4455 return result;
4456}
4457
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004458Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004459formatint(char *buf, size_t buflen, int flags,
4460 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004461{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004462 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004463 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4464 + 1 + 1 = 24 */
4465 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004466 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004467 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004468
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004469 x = PyInt_AsLong(v);
4470 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004471 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004472 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004473 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004474 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004475 if (x < 0 && type == 'u') {
4476 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004477 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004478 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4479 sign = "-";
4480 else
4481 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004482 if (prec < 0)
4483 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004484
4485 if ((flags & F_ALT) &&
4486 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004487 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004488 * of issues that cause pain:
4489 * - when 0 is being converted, the C standard leaves off
4490 * the '0x' or '0X', which is inconsistent with other
4491 * %#x/%#X conversions and inconsistent with Python's
4492 * hex() function
4493 * - there are platforms that violate the standard and
4494 * convert 0 with the '0x' or '0X'
4495 * (Metrowerks, Compaq Tru64)
4496 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004497 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004498 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004499 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004500 * We can achieve the desired consistency by inserting our
4501 * own '0x' or '0X' prefix, and substituting %x/%X in place
4502 * of %#x/%#X.
4503 *
4504 * Note that this is the same approach as used in
4505 * formatint() in unicodeobject.c
4506 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004507 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4508 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004509 }
4510 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004511 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4512 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004513 prec, type);
4514 }
4515
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004516 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4517 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004518 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004519 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004520 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004521 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004522 return -1;
4523 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004524 if (sign[0])
4525 PyOS_snprintf(buf, buflen, fmt, -x);
4526 else
4527 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004528 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004529}
4530
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004531Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004532formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004533{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004534 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 if (PyString_Check(v)) {
4536 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004537 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004538 }
4539 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004540 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004541 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004542 }
4543 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004544 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004545}
4546
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004547/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4548
4549 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4550 chars are formatted. XXX This is a magic number. Each formatting
4551 routine does bounds checking to ensure no overflow, but a better
4552 solution may be to malloc a buffer of appropriate size for each
4553 format. For now, the current solution is sufficient.
4554*/
4555#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004556
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004557PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004558PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004559{
4560 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004561 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004562 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004563 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004564 PyObject *result, *orig_args;
4565#ifdef Py_USING_UNICODE
4566 PyObject *v, *w;
4567#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004568 PyObject *dict = NULL;
4569 if (format == NULL || !PyString_Check(format) || args == NULL) {
4570 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004571 return NULL;
4572 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004573 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004574 fmt = PyString_AS_STRING(format);
4575 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004576 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004578 if (result == NULL)
4579 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 res = PyString_AsString(result);
4581 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004582 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004583 argidx = 0;
4584 }
4585 else {
4586 arglen = -1;
4587 argidx = -2;
4588 }
Christian Heimese93237d2007-12-19 02:37:44 +00004589 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004590 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004591 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004592 while (--fmtcnt >= 0) {
4593 if (*fmt != '%') {
4594 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004595 rescnt = fmtcnt + 100;
4596 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004598 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004599 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004600 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004601 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004602 }
4603 *res++ = *fmt++;
4604 }
4605 else {
4606 /* Got a format specifier */
4607 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004608 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004609 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004610 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004611 int fill;
Facundo Batistac11cecf2008-02-24 03:17:21 +00004612 int isnumok;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004613 PyObject *v = NULL;
4614 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004615 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004616 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004617 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004618 char formatbuf[FORMATBUFLEN];
4619 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004620#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004621 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004622 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004623#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004624
Guido van Rossumda9c2711996-12-05 21:58:58 +00004625 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004626 if (*fmt == '(') {
4627 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004628 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004629 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004630 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004631
4632 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004634 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004635 goto error;
4636 }
4637 ++fmt;
4638 --fmtcnt;
4639 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004640 /* Skip over balanced parentheses */
4641 while (pcount > 0 && --fmtcnt >= 0) {
4642 if (*fmt == ')')
4643 --pcount;
4644 else if (*fmt == '(')
4645 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004646 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004647 }
4648 keylen = fmt - keystart - 1;
4649 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004650 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004651 "incomplete format key");
4652 goto error;
4653 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 key = PyString_FromStringAndSize(keystart,
4655 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004656 if (key == NULL)
4657 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004658 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004659 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004660 args_owned = 0;
4661 }
4662 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004663 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004664 if (args == NULL) {
4665 goto error;
4666 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004667 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004668 arglen = -1;
4669 argidx = -2;
4670 }
Guido van Rossume5372401993-03-16 12:15:04 +00004671 while (--fmtcnt >= 0) {
4672 switch (c = *fmt++) {
4673 case '-': flags |= F_LJUST; continue;
4674 case '+': flags |= F_SIGN; continue;
4675 case ' ': flags |= F_BLANK; continue;
4676 case '#': flags |= F_ALT; continue;
4677 case '0': flags |= F_ZERO; continue;
4678 }
4679 break;
4680 }
4681 if (c == '*') {
4682 v = getnextarg(args, arglen, &argidx);
4683 if (v == NULL)
4684 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004685 if (!PyInt_Check(v)) {
4686 PyErr_SetString(PyExc_TypeError,
4687 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004688 goto error;
4689 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004690 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004691 if (width < 0) {
4692 flags |= F_LJUST;
4693 width = -width;
4694 }
Guido van Rossume5372401993-03-16 12:15:04 +00004695 if (--fmtcnt >= 0)
4696 c = *fmt++;
4697 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004698 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004699 width = c - '0';
4700 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004701 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004702 if (!isdigit(c))
4703 break;
4704 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004705 PyErr_SetString(
4706 PyExc_ValueError,
4707 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004708 goto error;
4709 }
4710 width = width*10 + (c - '0');
4711 }
4712 }
4713 if (c == '.') {
4714 prec = 0;
4715 if (--fmtcnt >= 0)
4716 c = *fmt++;
4717 if (c == '*') {
4718 v = getnextarg(args, arglen, &argidx);
4719 if (v == NULL)
4720 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004721 if (!PyInt_Check(v)) {
4722 PyErr_SetString(
4723 PyExc_TypeError,
4724 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004725 goto error;
4726 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004727 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004728 if (prec < 0)
4729 prec = 0;
4730 if (--fmtcnt >= 0)
4731 c = *fmt++;
4732 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004733 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004734 prec = c - '0';
4735 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004736 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004737 if (!isdigit(c))
4738 break;
4739 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004740 PyErr_SetString(
4741 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004742 "prec too big");
4743 goto error;
4744 }
4745 prec = prec*10 + (c - '0');
4746 }
4747 }
4748 } /* prec */
4749 if (fmtcnt >= 0) {
4750 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004751 if (--fmtcnt >= 0)
4752 c = *fmt++;
4753 }
4754 }
4755 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004756 PyErr_SetString(PyExc_ValueError,
4757 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004758 goto error;
4759 }
4760 if (c != '%') {
4761 v = getnextarg(args, arglen, &argidx);
4762 if (v == NULL)
4763 goto error;
4764 }
4765 sign = 0;
4766 fill = ' ';
4767 switch (c) {
4768 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004769 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004770 len = 1;
4771 break;
4772 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004773#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004774 if (PyUnicode_Check(v)) {
4775 fmt = fmt_start;
4776 argidx = argidx_start;
4777 goto unicode;
4778 }
Georg Brandld45014b2005-10-01 17:06:00 +00004779#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004780 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004781#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004782 if (temp != NULL && PyUnicode_Check(temp)) {
4783 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004784 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004785 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004786 goto unicode;
4787 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004788#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004789 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004790 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004791 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004792 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004793 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004794 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004795 if (!PyString_Check(temp)) {
4796 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004797 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004798 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004799 goto error;
4800 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004801 pbuf = PyString_AS_STRING(temp);
4802 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004803 if (prec >= 0 && len > prec)
4804 len = prec;
4805 break;
4806 case 'i':
4807 case 'd':
4808 case 'u':
4809 case 'o':
4810 case 'x':
4811 case 'X':
4812 if (c == 'i')
4813 c = 'd';
Facundo Batistac11cecf2008-02-24 03:17:21 +00004814 isnumok = 0;
4815 if (PyNumber_Check(v)) {
4816 PyObject *iobj=NULL;
4817
4818 if (PyInt_Check(v) || (PyLong_Check(v))) {
4819 iobj = v;
4820 Py_INCREF(iobj);
4821 }
4822 else {
4823 iobj = PyNumber_Int(v);
4824 if (iobj==NULL) iobj = PyNumber_Long(v);
4825 }
4826 if (iobj!=NULL) {
4827 if (PyInt_Check(iobj)) {
4828 isnumok = 1;
4829 pbuf = formatbuf;
4830 len = formatint(pbuf,
4831 sizeof(formatbuf),
4832 flags, prec, c, iobj);
4833 Py_DECREF(iobj);
4834 if (len < 0)
4835 goto error;
4836 sign = 1;
4837 }
4838 else if (PyLong_Check(iobj)) {
4839 int ilen;
4840
4841 isnumok = 1;
4842 temp = _PyString_FormatLong(iobj, flags,
4843 prec, c, &pbuf, &ilen);
4844 Py_DECREF(iobj);
4845 len = ilen;
4846 if (!temp)
4847 goto error;
4848 sign = 1;
4849 }
4850 else {
4851 Py_DECREF(iobj);
4852 }
4853 }
Guido van Rossum4acdc231997-01-29 06:00:24 +00004854 }
Facundo Batistac11cecf2008-02-24 03:17:21 +00004855 if (!isnumok) {
4856 PyErr_Format(PyExc_TypeError,
4857 "%%%c format: a number is required, "
4858 "not %.200s", c, Py_TYPE(v)->tp_name);
4859 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004860 }
4861 if (flags & F_ZERO)
4862 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004863 break;
4864 case 'e':
4865 case 'E':
4866 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004867 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004868 case 'g':
4869 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004870 if (c == 'F')
4871 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004872 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004873 len = formatfloat(pbuf, sizeof(formatbuf),
4874 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004875 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004876 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004877 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004878 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004879 fill = '0';
4880 break;
4881 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004882#ifdef Py_USING_UNICODE
4883 if (PyUnicode_Check(v)) {
4884 fmt = fmt_start;
4885 argidx = argidx_start;
4886 goto unicode;
4887 }
4888#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004889 pbuf = formatbuf;
4890 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004891 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004892 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004893 break;
4894 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004895 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004896 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004897 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004898 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004899 (Py_ssize_t)(fmt - 1 -
4900 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004901 goto error;
4902 }
4903 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004904 if (*pbuf == '-' || *pbuf == '+') {
4905 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004906 len--;
4907 }
4908 else if (flags & F_SIGN)
4909 sign = '+';
4910 else if (flags & F_BLANK)
4911 sign = ' ';
4912 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004913 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004914 }
4915 if (width < len)
4916 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004917 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004918 reslen -= rescnt;
4919 rescnt = width + fmtcnt + 100;
4920 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004921 if (reslen < 0) {
4922 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004923 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004924 return PyErr_NoMemory();
4925 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004926 if (_PyString_Resize(&result, reslen) < 0) {
4927 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004928 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004929 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004930 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004931 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004932 }
4933 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004934 if (fill != ' ')
4935 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004936 rescnt--;
4937 if (width > len)
4938 width--;
4939 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004940 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4941 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004942 assert(pbuf[1] == c);
4943 if (fill != ' ') {
4944 *res++ = *pbuf++;
4945 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004946 }
Tim Petersfff53252001-04-12 18:38:48 +00004947 rescnt -= 2;
4948 width -= 2;
4949 if (width < 0)
4950 width = 0;
4951 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004952 }
4953 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004954 do {
4955 --rescnt;
4956 *res++ = fill;
4957 } while (--width > len);
4958 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004959 if (fill == ' ') {
4960 if (sign)
4961 *res++ = sign;
4962 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004963 (c == 'x' || c == 'X')) {
4964 assert(pbuf[0] == '0');
4965 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004966 *res++ = *pbuf++;
4967 *res++ = *pbuf++;
4968 }
4969 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004970 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004971 res += len;
4972 rescnt -= len;
4973 while (--width >= len) {
4974 --rescnt;
4975 *res++ = ' ';
4976 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004977 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004978 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004979 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004980 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004981 goto error;
4982 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004983 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004984 } /* '%' */
4985 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004986 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004987 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004988 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004989 goto error;
4990 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004991 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004992 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004993 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004994 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004995 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004996
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004997#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004998 unicode:
4999 if (args_owned) {
5000 Py_DECREF(args);
5001 args_owned = 0;
5002 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00005003 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00005004 if (PyTuple_Check(orig_args) && argidx > 0) {
5005 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00005006 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00005007 v = PyTuple_New(n);
5008 if (v == NULL)
5009 goto error;
5010 while (--n >= 0) {
5011 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5012 Py_INCREF(w);
5013 PyTuple_SET_ITEM(v, n, w);
5014 }
5015 args = v;
5016 } else {
5017 Py_INCREF(orig_args);
5018 args = orig_args;
5019 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005020 args_owned = 1;
5021 /* Take what we have of the result and let the Unicode formatting
5022 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00005023 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005024 if (_PyString_Resize(&result, rescnt))
5025 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00005026 fmtcnt = PyString_GET_SIZE(format) - \
5027 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005028 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5029 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00005030 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005031 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00005032 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005033 if (v == NULL)
5034 goto error;
5035 /* Paste what we have (result) to what the Unicode formatting
5036 function returned (v) and return the result (or error) */
5037 w = PyUnicode_Concat(result, v);
5038 Py_DECREF(result);
5039 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005040 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005041 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005042#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005043
Guido van Rossume5372401993-03-16 12:15:04 +00005044 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005045 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005046 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005047 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005048 }
Guido van Rossume5372401993-03-16 12:15:04 +00005049 return NULL;
5050}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005051
Guido van Rossum2a61e741997-01-18 07:55:05 +00005052void
Fred Drakeba096332000-07-09 07:04:36 +00005053PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005054{
5055 register PyStringObject *s = (PyStringObject *)(*p);
5056 PyObject *t;
5057 if (s == NULL || !PyString_Check(s))
5058 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005059 /* If it's a string subclass, we don't really know what putting
5060 it in the interned dict might do. */
5061 if (!PyString_CheckExact(s))
5062 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005063 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005064 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005065 if (interned == NULL) {
5066 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005067 if (interned == NULL) {
5068 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005069 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005070 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005071 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005072 t = PyDict_GetItem(interned, (PyObject *)s);
5073 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005074 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005075 Py_DECREF(*p);
5076 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005077 return;
5078 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005079
Armin Rigo79f7ad22004-08-07 19:27:39 +00005080 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005081 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005082 return;
5083 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005084 /* The two references in interned are not counted by refcnt.
5085 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005086 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005087 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005088}
5089
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005090void
5091PyString_InternImmortal(PyObject **p)
5092{
5093 PyString_InternInPlace(p);
5094 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5095 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5096 Py_INCREF(*p);
5097 }
5098}
5099
Guido van Rossum2a61e741997-01-18 07:55:05 +00005100
5101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005102PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005103{
5104 PyObject *s = PyString_FromString(cp);
5105 if (s == NULL)
5106 return NULL;
5107 PyString_InternInPlace(&s);
5108 return s;
5109}
5110
Guido van Rossum8cf04761997-08-02 02:57:45 +00005111void
Fred Drakeba096332000-07-09 07:04:36 +00005112PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005113{
5114 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005115 for (i = 0; i < UCHAR_MAX + 1; i++) {
5116 Py_XDECREF(characters[i]);
5117 characters[i] = NULL;
5118 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005119 Py_XDECREF(nullstring);
5120 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005121}
Barry Warsawa903ad982001-02-23 16:40:48 +00005122
Barry Warsawa903ad982001-02-23 16:40:48 +00005123void _Py_ReleaseInternedStrings(void)
5124{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005125 PyObject *keys;
5126 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005127 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005128 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005129
5130 if (interned == NULL || !PyDict_Check(interned))
5131 return;
5132 keys = PyDict_Keys(interned);
5133 if (keys == NULL || !PyList_Check(keys)) {
5134 PyErr_Clear();
5135 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005136 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005137
5138 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5139 detector, interned strings are not forcibly deallocated; rather, we
5140 give them their stolen references back, and then clear and DECREF
5141 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005142
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005143 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005144 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5145 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005146 for (i = 0; i < n; i++) {
5147 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5148 switch (s->ob_sstate) {
5149 case SSTATE_NOT_INTERNED:
5150 /* XXX Shouldn't happen */
5151 break;
5152 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005153 Py_REFCNT(s) += 1;
5154 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005155 break;
5156 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005157 Py_REFCNT(s) += 2;
5158 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005159 break;
5160 default:
5161 Py_FatalError("Inconsistent interned string state.");
5162 }
5163 s->ob_sstate = SSTATE_NOT_INTERNED;
5164 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005165 fprintf(stderr, "total size of all interned strings: "
5166 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5167 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005168 Py_DECREF(keys);
5169 PyDict_Clear(interned);
5170 Py_DECREF(interned);
5171 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005172}